clang 15.0.7
CGBuiltin.cpp
Go to the documentation of this file.
1//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGCUDARuntime.h"
15#include "CGCXXABI.h"
16#include "CGObjCRuntime.h"
17#include "CGOpenCLRuntime.h"
18#include "CGRecordLayout.h"
19#include "CodeGenFunction.h"
20#include "CodeGenModule.h"
21#include "ConstantEmitter.h"
22#include "PatternInit.h"
23#include "TargetInfo.h"
25#include "clang/AST/Attr.h"
26#include "clang/AST/Decl.h"
27#include "clang/AST/OSLog.h"
31#include "llvm/ADT/APFloat.h"
32#include "llvm/ADT/APInt.h"
33#include "llvm/ADT/SmallPtrSet.h"
34#include "llvm/ADT/StringExtras.h"
35#include "llvm/Analysis/ValueTracking.h"
36#include "llvm/IR/DataLayout.h"
37#include "llvm/IR/InlineAsm.h"
38#include "llvm/IR/Intrinsics.h"
39#include "llvm/IR/IntrinsicsAArch64.h"
40#include "llvm/IR/IntrinsicsAMDGPU.h"
41#include "llvm/IR/IntrinsicsARM.h"
42#include "llvm/IR/IntrinsicsBPF.h"
43#include "llvm/IR/IntrinsicsHexagon.h"
44#include "llvm/IR/IntrinsicsNVPTX.h"
45#include "llvm/IR/IntrinsicsPowerPC.h"
46#include "llvm/IR/IntrinsicsR600.h"
47#include "llvm/IR/IntrinsicsRISCV.h"
48#include "llvm/IR/IntrinsicsS390.h"
49#include "llvm/IR/IntrinsicsVE.h"
50#include "llvm/IR/IntrinsicsWebAssembly.h"
51#include "llvm/IR/IntrinsicsX86.h"
52#include "llvm/IR/MDBuilder.h"
53#include "llvm/IR/MatrixBuilder.h"
54#include "llvm/Support/ConvertUTF.h"
55#include "llvm/Support/ScopedPrinter.h"
56#include "llvm/Support/X86TargetParser.h"
57#include <sstream>
58
59using namespace clang;
60using namespace CodeGen;
61using namespace llvm;
62
63static
65 return std::min(High, std::max(Low, Value));
66}
67
68static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
69 Align AlignmentInBytes) {
70 ConstantInt *Byte;
71 switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
72 case LangOptions::TrivialAutoVarInitKind::Uninitialized:
73 // Nothing to initialize.
74 return;
75 case LangOptions::TrivialAutoVarInitKind::Zero:
76 Byte = CGF.Builder.getInt8(0x00);
77 break;
78 case LangOptions::TrivialAutoVarInitKind::Pattern: {
79 llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());
80 Byte = llvm::dyn_cast<llvm::ConstantInt>(
81 initializationPatternFor(CGF.CGM, Int8));
82 break;
83 }
84 }
85 if (CGF.CGM.stopAutoInit())
86 return;
87 auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
88 I->addAnnotationMetadata("auto-init");
89}
90
91/// getBuiltinLibFunction - Given a builtin id for a function like
92/// "__builtin_fabsf", return a Function* for "fabsf".
94 unsigned BuiltinID) {
95 assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
96
97 // Get the name, skip over the __builtin_ prefix (if necessary).
98 StringRef Name;
99 GlobalDecl D(FD);
100
101 // TODO: This list should be expanded or refactored after all GCC-compatible
102 // std libcall builtins are implemented.
103 static SmallDenseMap<unsigned, StringRef, 8> F128Builtins{
104 {Builtin::BI__builtin_printf, "__printfieee128"},
105 {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"},
106 {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"},
107 {Builtin::BI__builtin_sprintf, "__sprintfieee128"},
108 {Builtin::BI__builtin_snprintf, "__snprintfieee128"},
109 {Builtin::BI__builtin_fprintf, "__fprintfieee128"},
110 {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},
111 };
112
113 // If the builtin has been declared explicitly with an assembler label,
114 // use the mangled name. This differs from the plain label on platforms
115 // that prefix labels.
116 if (FD->hasAttr<AsmLabelAttr>())
117 Name = getMangledName(D);
118 else {
119 // TODO: This mutation should also be applied to other targets other than
120 // PPC, after backend supports IEEE 128-bit style libcalls.
121 if (getTriple().isPPC64() &&
122 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
123 F128Builtins.find(BuiltinID) != F128Builtins.end())
124 Name = F128Builtins[BuiltinID];
125 else
126 Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
127 }
128
129 llvm::FunctionType *Ty =
130 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
131
132 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
133}
134
135/// Emit the conversions required to turn the given value into an
136/// integer of the given size.
137static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
138 QualType T, llvm::IntegerType *IntType) {
139 V = CGF.EmitToMemory(V, T);
140
141 if (V->getType()->isPointerTy())
142 return CGF.Builder.CreatePtrToInt(V, IntType);
143
144 assert(V->getType() == IntType);
145 return V;
146}
147
148static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
149 QualType T, llvm::Type *ResultType) {
150 V = CGF.EmitFromMemory(V, T);
151
152 if (ResultType->isPointerTy())
153 return CGF.Builder.CreateIntToPtr(V, ResultType);
154
155 assert(V->getType() == ResultType);
156 return V;
157}
158
159/// Utility to insert an atomic instruction based on Intrinsic::ID
160/// and the expression node.
162 CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
163 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
164
165 QualType T = E->getType();
166 assert(E->getArg(0)->getType()->isPointerType());
167 assert(CGF.getContext().hasSameUnqualifiedType(T,
168 E->getArg(0)->getType()->getPointeeType()));
169 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
170
171 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
172 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
173
174 llvm::IntegerType *IntType =
175 llvm::IntegerType::get(CGF.getLLVMContext(),
176 CGF.getContext().getTypeSize(T));
177 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
178
179 llvm::Value *Args[2];
180 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
181 Args[1] = CGF.EmitScalarExpr(E->getArg(1));
182 llvm::Type *ValueType = Args[1]->getType();
183 Args[1] = EmitToInt(CGF, Args[1], T, IntType);
184
185 llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
186 Kind, Args[0], Args[1], Ordering);
187 return EmitFromInt(CGF, Result, T, ValueType);
188}
189
191 Value *Val = CGF.EmitScalarExpr(E->getArg(0));
192 Value *Address = CGF.EmitScalarExpr(E->getArg(1));
193
194 // Convert the type of the pointer to a pointer to the stored type.
195 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
196 unsigned SrcAddrSpace = Address->getType()->getPointerAddressSpace();
197 Value *BC = CGF.Builder.CreateBitCast(
198 Address, llvm::PointerType::get(Val->getType(), SrcAddrSpace), "cast");
199 LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
200 LV.setNontemporal(true);
201 CGF.EmitStoreOfScalar(Val, LV, false);
202 return nullptr;
203}
204
206 Value *Address = CGF.EmitScalarExpr(E->getArg(0));
207
208 LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
209 LV.setNontemporal(true);
210 return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
211}
212
214 llvm::AtomicRMWInst::BinOp Kind,
215 const CallExpr *E) {
216 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
217}
218
219/// Utility to insert an atomic instruction based Intrinsic::ID and
220/// the expression node, where the return value is the result of the
221/// operation.
223 llvm::AtomicRMWInst::BinOp Kind,
224 const CallExpr *E,
225 Instruction::BinaryOps Op,
226 bool Invert = false) {
227 QualType T = E->getType();
228 assert(E->getArg(0)->getType()->isPointerType());
229 assert(CGF.getContext().hasSameUnqualifiedType(T,
230 E->getArg(0)->getType()->getPointeeType()));
231 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
232
233 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
234 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
235
236 llvm::IntegerType *IntType =
237 llvm::IntegerType::get(CGF.getLLVMContext(),
238 CGF.getContext().getTypeSize(T));
239 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
240
241 llvm::Value *Args[2];
242 Args[1] = CGF.EmitScalarExpr(E->getArg(1));
243 llvm::Type *ValueType = Args[1]->getType();
244 Args[1] = EmitToInt(CGF, Args[1], T, IntType);
245 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
246
247 llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
248 Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
249 Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
250 if (Invert)
251 Result =
252 CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
253 llvm::ConstantInt::getAllOnesValue(IntType));
254 Result = EmitFromInt(CGF, Result, T, ValueType);
255 return RValue::get(Result);
256}
257
258/// Utility to insert an atomic cmpxchg instruction.
259///
260/// @param CGF The current codegen function.
261/// @param E Builtin call expression to convert to cmpxchg.
262/// arg0 - address to operate on
263/// arg1 - value to compare with
264/// arg2 - new value
265/// @param ReturnBool Specifies whether to return success flag of
266/// cmpxchg result or the old value.
267///
268/// @returns result of cmpxchg, according to ReturnBool
269///
270/// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
271/// invoke the function EmitAtomicCmpXchgForMSIntrin.
273 bool ReturnBool) {
274 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
275 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
276 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
277
278 llvm::IntegerType *IntType = llvm::IntegerType::get(
279 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
280 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
281
282 Value *Args[3];
283 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
284 Args[1] = CGF.EmitScalarExpr(E->getArg(1));
285 llvm::Type *ValueType = Args[1]->getType();
286 Args[1] = EmitToInt(CGF, Args[1], T, IntType);
287 Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
288
290 Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
291 llvm::AtomicOrdering::SequentiallyConsistent);
292 if (ReturnBool)
293 // Extract boolean success flag and zext it to int.
294 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
295 CGF.ConvertType(E->getType()));
296 else
297 // Extract old value and emit it using the same type as compare value.
298 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
299 ValueType);
300}
301
302/// This function should be invoked to emit atomic cmpxchg for Microsoft's
303/// _InterlockedCompareExchange* intrinsics which have the following signature:
304/// T _InterlockedCompareExchange(T volatile *Destination,
305/// T Exchange,
306/// T Comparand);
307///
308/// Whereas the llvm 'cmpxchg' instruction has the following syntax:
309/// cmpxchg *Destination, Comparand, Exchange.
310/// So we need to swap Comparand and Exchange when invoking
311/// CreateAtomicCmpXchg. That is the reason we could not use the above utility
312/// function MakeAtomicCmpXchgValue since it expects the arguments to be
313/// already swapped.
314
315static
317 AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
318 assert(E->getArg(0)->getType()->isPointerType());
320 E->getType(), E->getArg(0)->getType()->getPointeeType()));
321 assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
322 E->getArg(1)->getType()));
323 assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
324 E->getArg(2)->getType()));
325
326 auto *Destination = CGF.EmitScalarExpr(E->getArg(0));
327 auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
328 auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
329
330 // For Release ordering, the failure ordering should be Monotonic.
331 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
332 AtomicOrdering::Monotonic :
333 SuccessOrdering;
334
335 // The atomic instruction is marked volatile for consistency with MSVC. This
336 // blocks the few atomics optimizations that LLVM has. If we want to optimize
337 // _Interlocked* operations in the future, we will have to remove the volatile
338 // marker.
340 Destination, Comparand, Exchange,
341 SuccessOrdering, FailureOrdering);
342 Result->setVolatile(true);
343 return CGF.Builder.CreateExtractValue(Result, 0);
344}
345
346// 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
347// prototyped like this:
348//
349// unsigned char _InterlockedCompareExchange128...(
350// __int64 volatile * _Destination,
351// __int64 _ExchangeHigh,
352// __int64 _ExchangeLow,
353// __int64 * _ComparandResult);
355 const CallExpr *E,
356 AtomicOrdering SuccessOrdering) {
357 assert(E->getNumArgs() == 4);
358 llvm::Value *Destination = CGF.EmitScalarExpr(E->getArg(0));
359 llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
360 llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
361 llvm::Value *ComparandPtr = CGF.EmitScalarExpr(E->getArg(3));
362
363 assert(Destination->getType()->isPointerTy());
364 assert(!ExchangeHigh->getType()->isPointerTy());
365 assert(!ExchangeLow->getType()->isPointerTy());
366 assert(ComparandPtr->getType()->isPointerTy());
367
368 // For Release ordering, the failure ordering should be Monotonic.
369 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
370 ? AtomicOrdering::Monotonic
371 : SuccessOrdering;
372
373 // Convert to i128 pointers and values.
374 llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
375 llvm::Type *Int128PtrTy = Int128Ty->getPointerTo();
376 Destination = CGF.Builder.CreateBitCast(Destination, Int128PtrTy);
377 Address ComparandResult(CGF.Builder.CreateBitCast(ComparandPtr, Int128PtrTy),
378 Int128Ty, CGF.getContext().toCharUnitsFromBits(128));
379
380 // (((i128)hi) << 64) | ((i128)lo)
381 ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
382 ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty);
383 ExchangeHigh =
384 CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));
385 llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
386
387 // Load the comparand for the instruction.
388 llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandResult);
389
390 auto *CXI = CGF.Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
391 SuccessOrdering, FailureOrdering);
392
393 // The atomic instruction is marked volatile for consistency with MSVC. This
394 // blocks the few atomics optimizations that LLVM has. If we want to optimize
395 // _Interlocked* operations in the future, we will have to remove the volatile
396 // marker.
397 CXI->setVolatile(true);
398
399 // Store the result as an outparameter.
400 CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
401 ComparandResult);
402
403 // Get the success boolean and zero extend it to i8.
404 Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
405 return CGF.Builder.CreateZExt(Success, CGF.Int8Ty);
406}
407
409 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
410 assert(E->getArg(0)->getType()->isPointerType());
411
412 auto *IntTy = CGF.ConvertType(E->getType());
413 auto *Result = CGF.Builder.CreateAtomicRMW(
414 AtomicRMWInst::Add,
415 CGF.EmitScalarExpr(E->getArg(0)),
416 ConstantInt::get(IntTy, 1),
417 Ordering);
418 return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
419}
420
422 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
423 assert(E->getArg(0)->getType()->isPointerType());
424
425 auto *IntTy = CGF.ConvertType(E->getType());
426 auto *Result = CGF.Builder.CreateAtomicRMW(
427 AtomicRMWInst::Sub,
428 CGF.EmitScalarExpr(E->getArg(0)),
429 ConstantInt::get(IntTy, 1),
430 Ordering);
431 return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
432}
433
434// Build a plain volatile load.
436 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
437 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
438 CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy);
439 llvm::Type *ITy =
440 llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8);
441 Ptr = CGF.Builder.CreateBitCast(Ptr, ITy->getPointerTo());
442 llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize);
443 Load->setVolatile(true);
444 return Load;
445}
446
447// Build a plain volatile store.
449 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
450 Value *Value = CGF.EmitScalarExpr(E->getArg(1));
451 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
452 CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy);
453 llvm::Type *ITy =
454 llvm::IntegerType::get(CGF.getLLVMContext(), StoreSize.getQuantity() * 8);
455 Ptr = CGF.Builder.CreateBitCast(Ptr, ITy->getPointerTo());
456 llvm::StoreInst *Store =
457 CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize);
458 Store->setVolatile(true);
459 return Store;
460}
461
462// Emit a simple mangled intrinsic that has 1 argument and a return type
463// matching the argument type. Depending on mode, this may be a constrained
464// floating-point intrinsic.
466 const CallExpr *E, unsigned IntrinsicID,
467 unsigned ConstrainedIntrinsicID) {
468 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
469
470 if (CGF.Builder.getIsFPConstrained()) {
471 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
472 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
473 return CGF.Builder.CreateConstrainedFPCall(F, { Src0 });
474 } else {
475 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
476 return CGF.Builder.CreateCall(F, Src0);
477 }
478}
479
480// Emit an intrinsic that has 2 operands of the same type as its result.
481// Depending on mode, this may be a constrained floating-point intrinsic.
483 const CallExpr *E, unsigned IntrinsicID,
484 unsigned ConstrainedIntrinsicID) {
485 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
486 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
487
488 if (CGF.Builder.getIsFPConstrained()) {
489 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
490 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
491 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
492 } else {
493 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
494 return CGF.Builder.CreateCall(F, { Src0, Src1 });
495 }
496}
497
498// Emit an intrinsic that has 3 operands of the same type as its result.
499// Depending on mode, this may be a constrained floating-point intrinsic.
501 const CallExpr *E, unsigned IntrinsicID,
502 unsigned ConstrainedIntrinsicID) {
503 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
504 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
505 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
506
507 if (CGF.Builder.getIsFPConstrained()) {
508 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
509 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
510 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
511 } else {
512 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
513 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
514 }
515}
516
517// Emit an intrinsic where all operands are of the same type as the result.
518// Depending on mode, this may be a constrained floating-point intrinsic.
520 unsigned IntrinsicID,
521 unsigned ConstrainedIntrinsicID,
522 llvm::Type *Ty,
523 ArrayRef<Value *> Args) {
524 Function *F;
525 if (CGF.Builder.getIsFPConstrained())
526 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
527 else
528 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
529
530 if (CGF.Builder.getIsFPConstrained())
531 return CGF.Builder.CreateConstrainedFPCall(F, Args);
532 else
533 return CGF.Builder.CreateCall(F, Args);
534}
535
536// Emit a simple mangled intrinsic that has 1 argument and a return type
537// matching the argument type.
539 unsigned IntrinsicID,
540 llvm::StringRef Name = "") {
541 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
542
543 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
544 return CGF.Builder.CreateCall(F, Src0, Name);
545}
546
547// Emit an intrinsic that has 2 operands of the same type as its result.
549 const CallExpr *E,
550 unsigned IntrinsicID) {
551 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
552 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
553
554 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
555 return CGF.Builder.CreateCall(F, { Src0, Src1 });
556}
557
558// Emit an intrinsic that has 3 operands of the same type as its result.
560 const CallExpr *E,
561 unsigned IntrinsicID) {
562 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
563 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
564 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
565
566 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
567 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
568}
569
570// Emit an intrinsic that has 1 float or double operand, and 1 integer.
572 const CallExpr *E,
573 unsigned IntrinsicID) {
574 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
575 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
576
577 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
578 return CGF.Builder.CreateCall(F, {Src0, Src1});
579}
580
581// Emit an intrinsic that has overloaded integer result and fp operand.
582static Value *
584 unsigned IntrinsicID,
585 unsigned ConstrainedIntrinsicID) {
586 llvm::Type *ResultType = CGF.ConvertType(E->getType());
587 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
588
589 if (CGF.Builder.getIsFPConstrained()) {
590 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
591 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
592 {ResultType, Src0->getType()});
593 return CGF.Builder.CreateConstrainedFPCall(F, {Src0});
594 } else {
595 Function *F =
596 CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()});
597 return CGF.Builder.CreateCall(F, Src0);
598 }
599}
600
601/// EmitFAbs - Emit a call to @llvm.fabs().
603 Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
604 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
605 Call->setDoesNotAccessMemory();
606 return Call;
607}
608
609/// Emit the computation of the sign bit for a floating point value. Returns
610/// the i1 sign bit value.
612 LLVMContext &C = CGF.CGM.getLLVMContext();
613
614 llvm::Type *Ty = V->getType();
615 int Width = Ty->getPrimitiveSizeInBits();
616 llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
617 V = CGF.Builder.CreateBitCast(V, IntTy);
618 if (Ty->isPPC_FP128Ty()) {
619 // We want the sign bit of the higher-order double. The bitcast we just
620 // did works as if the double-double was stored to memory and then
621 // read as an i128. The "store" will put the higher-order double in the
622 // lower address in both little- and big-Endian modes, but the "load"
623 // will treat those bits as a different part of the i128: the low bits in
624 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
625 // we need to shift the high bits down to the low before truncating.
626 Width >>= 1;
627 if (CGF.getTarget().isBigEndian()) {
628 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
629 V = CGF.Builder.CreateLShr(V, ShiftCst);
630 }
631 // We are truncating value in order to extract the higher-order
632 // double, which we will be using to extract the sign from.
633 IntTy = llvm::IntegerType::get(C, Width);
634 V = CGF.Builder.CreateTrunc(V, IntTy);
635 }
636 Value *Zero = llvm::Constant::getNullValue(IntTy);
637 return CGF.Builder.CreateICmpSLT(V, Zero);
638}
639
641 const CallExpr *E, llvm::Constant *calleeValue) {
642 CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
643 return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
644}
645
646/// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
647/// depending on IntrinsicID.
648///
649/// \arg CGF The current codegen function.
650/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
651/// \arg X The first argument to the llvm.*.with.overflow.*.
652/// \arg Y The second argument to the llvm.*.with.overflow.*.
653/// \arg Carry The carry returned by the llvm.*.with.overflow.*.
654/// \returns The result (i.e. sum/product) returned by the intrinsic.
655static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
656 const llvm::Intrinsic::ID IntrinsicID,
657 llvm::Value *X, llvm::Value *Y,
658 llvm::Value *&Carry) {
659 // Make sure we have integers of the same width.
660 assert(X->getType() == Y->getType() &&
661 "Arguments must be the same type. (Did you forget to make sure both "
662 "arguments have the same integer width?)");
663
664 Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
665 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
666 Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
667 return CGF.Builder.CreateExtractValue(Tmp, 0);
668}
669
671 unsigned IntrinsicID,
672 int low, int high) {
673 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
674 llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
675 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
676 llvm::Instruction *Call = CGF.Builder.CreateCall(F);
677 Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
678 return Call;
679}
680
681namespace {
682 struct WidthAndSignedness {
683 unsigned Width;
684 bool Signed;
685 };
686}
687
688static WidthAndSignedness
690 const clang::QualType Type) {
691 assert(Type->isIntegerType() && "Given type is not an integer.");
692 unsigned Width = Type->isBooleanType() ? 1
693 : Type->isBitIntType() ? context.getIntWidth(Type)
694 : context.getTypeInfo(Type).Width;
696 return {Width, Signed};
697}
698
699// Given one or more integer types, this function produces an integer type that
700// encompasses them: any value in one of the given types could be expressed in
701// the encompassing type.
702static struct WidthAndSignedness
703EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
704 assert(Types.size() > 0 && "Empty list of types.");
705
706 // If any of the given types is signed, we must return a signed type.
707 bool Signed = false;
708 for (const auto &Type : Types) {
709 Signed |= Type.Signed;
710 }
711
712 // The encompassing type must have a width greater than or equal to the width
713 // of the specified types. Additionally, if the encompassing type is signed,
714 // its width must be strictly greater than the width of any unsigned types
715 // given.
716 unsigned Width = 0;
717 for (const auto &Type : Types) {
718 unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
719 if (Width < MinWidth) {
720 Width = MinWidth;
721 }
722 }
723
724 return {Width, Signed};
725}
726
727Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
728 llvm::Type *DestType = Int8PtrTy;
729 if (ArgValue->getType() != DestType)
730 ArgValue =
731 Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
732
733 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
734 return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
735}
736
737/// Checks if using the result of __builtin_object_size(p, @p From) in place of
738/// __builtin_object_size(p, @p To) is correct
739static bool areBOSTypesCompatible(int From, int To) {
740 // Note: Our __builtin_object_size implementation currently treats Type=0 and
741 // Type=2 identically. Encoding this implementation detail here may make
742 // improving __builtin_object_size difficult in the future, so it's omitted.
743 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
744}
745
746static llvm::Value *
747getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
748 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
749}
750
751llvm::Value *
752CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
753 llvm::IntegerType *ResType,
754 llvm::Value *EmittedE,
755 bool IsDynamic) {
756 uint64_t ObjectSize;
757 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
758 return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
759 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
760}
761
762/// Returns a Value corresponding to the size of the given expression.
763/// This Value may be either of the following:
764/// - A llvm::Argument (if E is a param with the pass_object_size attribute on
765/// it)
766/// - A call to the @llvm.objectsize intrinsic
767///
768/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
769/// and we wouldn't otherwise try to reference a pass_object_size parameter,
770/// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
771llvm::Value *
772CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
773 llvm::IntegerType *ResType,
774 llvm::Value *EmittedE, bool IsDynamic) {
775 // We need to reference an argument if the pointer is a parameter with the
776 // pass_object_size attribute.
777 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
778 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
779 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
780 if (Param != nullptr && PS != nullptr &&
781 areBOSTypesCompatible(PS->getType(), Type)) {
782 auto Iter = SizeArguments.find(Param);
783 assert(Iter != SizeArguments.end());
784
785 const ImplicitParamDecl *D = Iter->second;
786 auto DIter = LocalDeclMap.find(D);
787 assert(DIter != LocalDeclMap.end());
788
789 return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,
790 getContext().getSizeType(), E->getBeginLoc());
791 }
792 }
793
794 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
795 // evaluate E for side-effects. In either case, we shouldn't lower to
796 // @llvm.objectsize.
797 if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
799
800 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
801 assert(Ptr->getType()->isPointerTy() &&
802 "Non-pointer passed to __builtin_object_size?");
803
804 Function *F =
805 CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
806
807 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
808 Value *Min = Builder.getInt1((Type & 2) != 0);
809 // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
810 Value *NullIsUnknown = Builder.getTrue();
811 Value *Dynamic = Builder.getInt1(IsDynamic);
812 return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
813}
814
815namespace {
816/// A struct to generically describe a bit test intrinsic.
817struct BitTest {
818 enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
819 enum InterlockingKind : uint8_t {
820 Unlocked,
821 Sequential,
822 Acquire,
823 Release,
824 NoFence
825 };
826
827 ActionKind Action;
828 InterlockingKind Interlocking;
829 bool Is64Bit;
830
831 static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
832};
833} // namespace
834
835BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
836 switch (BuiltinID) {
837 // Main portable variants.
838 case Builtin::BI_bittest:
839 return {TestOnly, Unlocked, false};
840 case Builtin::BI_bittestandcomplement:
841 return {Complement, Unlocked, false};
842 case Builtin::BI_bittestandreset:
843 return {Reset, Unlocked, false};
844 case Builtin::BI_bittestandset:
845 return {Set, Unlocked, false};
846 case Builtin::BI_interlockedbittestandreset:
847 return {Reset, Sequential, false};
848 case Builtin::BI_interlockedbittestandset:
849 return {Set, Sequential, false};
850
851 // X86-specific 64-bit variants.
852 case Builtin::BI_bittest64:
853 return {TestOnly, Unlocked, true};
854 case Builtin::BI_bittestandcomplement64:
855 return {Complement, Unlocked, true};
856 case Builtin::BI_bittestandreset64:
857 return {Reset, Unlocked, true};
858 case Builtin::BI_bittestandset64:
859 return {Set, Unlocked, true};
860 case Builtin::BI_interlockedbittestandreset64:
861 return {Reset, Sequential, true};
862 case Builtin::BI_interlockedbittestandset64:
863 return {Set, Sequential, true};
864
865 // ARM/AArch64-specific ordering variants.
866 case Builtin::BI_interlockedbittestandset_acq:
867 return {Set, Acquire, false};
868 case Builtin::BI_interlockedbittestandset_rel:
869 return {Set, Release, false};
870 case Builtin::BI_interlockedbittestandset_nf:
871 return {Set, NoFence, false};
872 case Builtin::BI_interlockedbittestandreset_acq:
873 return {Reset, Acquire, false};
874 case Builtin::BI_interlockedbittestandreset_rel:
875 return {Reset, Release, false};
876 case Builtin::BI_interlockedbittestandreset_nf:
877 return {Reset, NoFence, false};
878 }
879 llvm_unreachable("expected only bittest intrinsics");
880}
881
882static char bitActionToX86BTCode(BitTest::ActionKind A) {
883 switch (A) {
884 case BitTest::TestOnly: return '\0';
885 case BitTest::Complement: return 'c';
886 case BitTest::Reset: return 'r';
887 case BitTest::Set: return 's';
888 }
889 llvm_unreachable("invalid action");
890}
891
893 BitTest BT,
894 const CallExpr *E, Value *BitBase,
895 Value *BitPos) {
896 char Action = bitActionToX86BTCode(BT.Action);
897 char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
898
899 // Build the assembly.
901 raw_svector_ostream AsmOS(Asm);
902 if (BT.Interlocking != BitTest::Unlocked)
903 AsmOS << "lock ";
904 AsmOS << "bt";
905 if (Action)
906 AsmOS << Action;
907 AsmOS << SizeSuffix << " $2, ($1)";
908
909 // Build the constraints. FIXME: We should support immediates when possible.
910 std::string Constraints = "={@ccc},r,r,~{cc},~{memory}";
911 std::string MachineClobbers = CGF.getTarget().getClobbers();
912 if (!MachineClobbers.empty()) {
913 Constraints += ',';
914 Constraints += MachineClobbers;
915 }
916 llvm::IntegerType *IntType = llvm::IntegerType::get(
917 CGF.getLLVMContext(),
918 CGF.getContext().getTypeSize(E->getArg(1)->getType()));
919 llvm::Type *IntPtrType = IntType->getPointerTo();
920 llvm::FunctionType *FTy =
921 llvm::FunctionType::get(CGF.Int8Ty, {IntPtrType, IntType}, false);
922
923 llvm::InlineAsm *IA =
924 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
925 return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
926}
927
928static llvm::AtomicOrdering
929getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
930 switch (I) {
931 case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
932 case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
933 case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
934 case BitTest::Release: return llvm::AtomicOrdering::Release;
935 case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
936 }
937 llvm_unreachable("invalid interlocking");
938}
939
940/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
941/// bits and a bit position and read and optionally modify the bit at that
942/// position. The position index can be arbitrarily large, i.e. it can be larger
943/// than 31 or 63, so we need an indexed load in the general case.
944static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
945 unsigned BuiltinID,
946 const CallExpr *E) {
947 Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
948 Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
949
950 BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
951
952 // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
953 // indexing operation internally. Use them if possible.
954 if (CGF.getTarget().getTriple().isX86())
955 return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
956
957 // Otherwise, use generic code to load one byte and test the bit. Use all but
958 // the bottom three bits as the array index, and the bottom three bits to form
959 // a mask.
960 // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
961 Value *ByteIndex = CGF.Builder.CreateAShr(
962 BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
963 Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy);
964 Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8,
965 ByteIndex, "bittest.byteaddr"),
966 CGF.Int8Ty, CharUnits::One());
967 Value *PosLow =
968 CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
969 llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
970
971 // The updating instructions will need a mask.
972 Value *Mask = nullptr;
973 if (BT.Action != BitTest::TestOnly) {
974 Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
975 "bittest.mask");
976 }
977
978 // Check the action and ordering of the interlocked intrinsics.
979 llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
980
981 Value *OldByte = nullptr;
982 if (Ordering != llvm::AtomicOrdering::NotAtomic) {
983 // Emit a combined atomicrmw load/store operation for the interlocked
984 // intrinsics.
985 llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
986 if (BT.Action == BitTest::Reset) {
987 Mask = CGF.Builder.CreateNot(Mask);
988 RMWOp = llvm::AtomicRMWInst::And;
989 }
990 OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr.getPointer(), Mask,
991 Ordering);
992 } else {
993 // Emit a plain load for the non-interlocked intrinsics.
994 OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
995 Value *NewByte = nullptr;
996 switch (BT.Action) {
997 case BitTest::TestOnly:
998 // Don't store anything.
999 break;
1000 case BitTest::Complement:
1001 NewByte = CGF.Builder.CreateXor(OldByte, Mask);
1002 break;
1003 case BitTest::Reset:
1004 NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
1005 break;
1006 case BitTest::Set:
1007 NewByte = CGF.Builder.CreateOr(OldByte, Mask);
1008 break;
1009 }
1010 if (NewByte)
1011 CGF.Builder.CreateStore(NewByte, ByteAddr);
1012 }
1013
1014 // However we loaded the old byte, either by plain load or atomicrmw, shift
1015 // the bit into the low position and mask it to 0 or 1.
1016 Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
1017 return CGF.Builder.CreateAnd(
1018 ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
1019}
1020
1022 unsigned BuiltinID,
1023 const CallExpr *E) {
1024 Value *Addr = CGF.EmitScalarExpr(E->getArg(0));
1025
1027 raw_svector_ostream AsmOS(Asm);
1028 llvm::IntegerType *RetType = CGF.Int32Ty;
1029
1030 switch (BuiltinID) {
1031 case clang::PPC::BI__builtin_ppc_ldarx:
1032 AsmOS << "ldarx ";
1033 RetType = CGF.Int64Ty;
1034 break;
1035 case clang::PPC::BI__builtin_ppc_lwarx:
1036 AsmOS << "lwarx ";
1037 RetType = CGF.Int32Ty;
1038 break;
1039 case clang::PPC::BI__builtin_ppc_lharx:
1040 AsmOS << "lharx ";
1041 RetType = CGF.Int16Ty;
1042 break;
1043 case clang::PPC::BI__builtin_ppc_lbarx:
1044 AsmOS << "lbarx ";
1045 RetType = CGF.Int8Ty;
1046 break;
1047 default:
1048 llvm_unreachable("Expected only PowerPC load reserve intrinsics");
1049 }
1050
1051 AsmOS << "$0, ${1:y}";
1052
1053 std::string Constraints = "=r,*Z,~{memory}";
1054 std::string MachineClobbers = CGF.getTarget().getClobbers();
1055 if (!MachineClobbers.empty()) {
1056 Constraints += ',';
1057 Constraints += MachineClobbers;
1058 }
1059
1060 llvm::Type *IntPtrType = RetType->getPointerTo();
1061 llvm::FunctionType *FTy =
1062 llvm::FunctionType::get(RetType, {IntPtrType}, false);
1063
1064 llvm::InlineAsm *IA =
1065 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1066 llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr});
1067 CI->addParamAttr(
1068 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType));
1069 return CI;
1070}
1071
1072namespace {
1073enum class MSVCSetJmpKind {
1074 _setjmpex,
1075 _setjmp3,
1076 _setjmp
1077};
1078}
1079
1080/// MSVC handles setjmp a bit differently on different platforms. On every
1081/// architecture except 32-bit x86, the frame address is passed. On x86, extra
1082/// parameters can be passed as variadic arguments, but we always pass none.
1083static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
1084 const CallExpr *E) {
1085 llvm::Value *Arg1 = nullptr;
1086 llvm::Type *Arg1Ty = nullptr;
1087 StringRef Name;
1088 bool IsVarArg = false;
1089 if (SJKind == MSVCSetJmpKind::_setjmp3) {
1090 Name = "_setjmp3";
1091 Arg1Ty = CGF.Int32Ty;
1092 Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
1093 IsVarArg = true;
1094 } else {
1095 Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
1096 Arg1Ty = CGF.Int8PtrTy;
1097 if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
1098 Arg1 = CGF.Builder.CreateCall(
1099 CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));
1100 } else
1101 Arg1 = CGF.Builder.CreateCall(
1102 CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),
1103 llvm::ConstantInt::get(CGF.Int32Ty, 0));
1104 }
1105
1106 // Mark the call site and declaration with ReturnsTwice.
1107 llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
1108 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
1109 CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
1110 llvm::Attribute::ReturnsTwice);
1111 llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(
1112 llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
1113 ReturnsTwiceAttr, /*Local=*/true);
1114
1115 llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
1116 CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
1117 llvm::Value *Args[] = {Buf, Arg1};
1118 llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
1119 CB->setAttributes(ReturnsTwiceAttr);
1120 return RValue::get(CB);
1121}
1122
1123// Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
1124// we handle them here.
1166
1168translateArmToMsvcIntrin(unsigned BuiltinID) {
1169 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1170 switch (BuiltinID) {
1171 default:
1172 return None;
1173 case clang::ARM::BI_BitScanForward:
1174 case clang::ARM::BI_BitScanForward64:
1175 return MSVCIntrin::_BitScanForward;
1176 case clang::ARM::BI_BitScanReverse:
1177 case clang::ARM::BI_BitScanReverse64:
1178 return MSVCIntrin::_BitScanReverse;
1179 case clang::ARM::BI_InterlockedAnd64:
1180 return MSVCIntrin::_InterlockedAnd;
1181 case clang::ARM::BI_InterlockedExchange64:
1182 return MSVCIntrin::_InterlockedExchange;
1183 case clang::ARM::BI_InterlockedExchangeAdd64:
1184 return MSVCIntrin::_InterlockedExchangeAdd;
1185 case clang::ARM::BI_InterlockedExchangeSub64:
1186 return MSVCIntrin::_InterlockedExchangeSub;
1187 case clang::ARM::BI_InterlockedOr64:
1188 return MSVCIntrin::_InterlockedOr;
1189 case clang::ARM::BI_InterlockedXor64:
1190 return MSVCIntrin::_InterlockedXor;
1191 case clang::ARM::BI_InterlockedDecrement64:
1192 return MSVCIntrin::_InterlockedDecrement;
1193 case clang::ARM::BI_InterlockedIncrement64:
1194 return MSVCIntrin::_InterlockedIncrement;
1195 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
1196 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
1197 case clang::ARM::BI_InterlockedExchangeAdd_acq:
1198 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
1199 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1200 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
1201 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
1202 case clang::ARM::BI_InterlockedExchangeAdd_rel:
1203 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
1204 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1205 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
1206 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
1207 case clang::ARM::BI_InterlockedExchangeAdd_nf:
1208 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
1209 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1210 case clang::ARM::BI_InterlockedExchange8_acq:
1211 case clang::ARM::BI_InterlockedExchange16_acq:
1212 case clang::ARM::BI_InterlockedExchange_acq:
1213 case clang::ARM::BI_InterlockedExchange64_acq:
1214 return MSVCIntrin::_InterlockedExchange_acq;
1215 case clang::ARM::BI_InterlockedExchange8_rel:
1216 case clang::ARM::BI_InterlockedExchange16_rel:
1217 case clang::ARM::BI_InterlockedExchange_rel:
1218 case clang::ARM::BI_InterlockedExchange64_rel:
1219 return MSVCIntrin::_InterlockedExchange_rel;
1220 case clang::ARM::BI_InterlockedExchange8_nf:
1221 case clang::ARM::BI_InterlockedExchange16_nf:
1222 case clang::ARM::BI_InterlockedExchange_nf:
1223 case clang::ARM::BI_InterlockedExchange64_nf:
1224 return MSVCIntrin::_InterlockedExchange_nf;
1225 case clang::ARM::BI_InterlockedCompareExchange8_acq:
1226 case clang::ARM::BI_InterlockedCompareExchange16_acq:
1227 case clang::ARM::BI_InterlockedCompareExchange_acq:
1228 case clang::ARM::BI_InterlockedCompareExchange64_acq:
1229 return MSVCIntrin::_InterlockedCompareExchange_acq;
1230 case clang::ARM::BI_InterlockedCompareExchange8_rel:
1231 case clang::ARM::BI_InterlockedCompareExchange16_rel:
1232 case clang::ARM::BI_InterlockedCompareExchange_rel:
1233 case clang::ARM::BI_InterlockedCompareExchange64_rel:
1234 return MSVCIntrin::_InterlockedCompareExchange_rel;
1235 case clang::ARM::BI_InterlockedCompareExchange8_nf:
1236 case clang::ARM::BI_InterlockedCompareExchange16_nf:
1237 case clang::ARM::BI_InterlockedCompareExchange_nf:
1238 case clang::ARM::BI_InterlockedCompareExchange64_nf:
1239 return MSVCIntrin::_InterlockedCompareExchange_nf;
1240 case clang::ARM::BI_InterlockedOr8_acq:
1241 case clang::ARM::BI_InterlockedOr16_acq:
1242 case clang::ARM::BI_InterlockedOr_acq:
1243 case clang::ARM::BI_InterlockedOr64_acq:
1244 return MSVCIntrin::_InterlockedOr_acq;
1245 case clang::ARM::BI_InterlockedOr8_rel:
1246 case clang::ARM::BI_InterlockedOr16_rel:
1247 case clang::ARM::BI_InterlockedOr_rel:
1248 case clang::ARM::BI_InterlockedOr64_rel:
1249 return MSVCIntrin::_InterlockedOr_rel;
1250 case clang::ARM::BI_InterlockedOr8_nf:
1251 case clang::ARM::BI_InterlockedOr16_nf:
1252 case clang::ARM::BI_InterlockedOr_nf:
1253 case clang::ARM::BI_InterlockedOr64_nf:
1254 return MSVCIntrin::_InterlockedOr_nf;
1255 case clang::ARM::BI_InterlockedXor8_acq:
1256 case clang::ARM::BI_InterlockedXor16_acq:
1257 case clang::ARM::BI_InterlockedXor_acq:
1258 case clang::ARM::BI_InterlockedXor64_acq:
1259 return MSVCIntrin::_InterlockedXor_acq;
1260 case clang::ARM::BI_InterlockedXor8_rel:
1261 case clang::ARM::BI_InterlockedXor16_rel:
1262 case clang::ARM::BI_InterlockedXor_rel:
1263 case clang::ARM::BI_InterlockedXor64_rel:
1264 return MSVCIntrin::_InterlockedXor_rel;
1265 case clang::ARM::BI_InterlockedXor8_nf:
1266 case clang::ARM::BI_InterlockedXor16_nf:
1267 case clang::ARM::BI_InterlockedXor_nf:
1268 case clang::ARM::BI_InterlockedXor64_nf:
1269 return MSVCIntrin::_InterlockedXor_nf;
1270 case clang::ARM::BI_InterlockedAnd8_acq:
1271 case clang::ARM::BI_InterlockedAnd16_acq:
1272 case clang::ARM::BI_InterlockedAnd_acq:
1273 case clang::ARM::BI_InterlockedAnd64_acq:
1274 return MSVCIntrin::_InterlockedAnd_acq;
1275 case clang::ARM::BI_InterlockedAnd8_rel:
1276 case clang::ARM::BI_InterlockedAnd16_rel:
1277 case clang::ARM::BI_InterlockedAnd_rel:
1278 case clang::ARM::BI_InterlockedAnd64_rel:
1279 return MSVCIntrin::_InterlockedAnd_rel;
1280 case clang::ARM::BI_InterlockedAnd8_nf:
1281 case clang::ARM::BI_InterlockedAnd16_nf:
1282 case clang::ARM::BI_InterlockedAnd_nf:
1283 case clang::ARM::BI_InterlockedAnd64_nf:
1284 return MSVCIntrin::_InterlockedAnd_nf;
1285 case clang::ARM::BI_InterlockedIncrement16_acq:
1286 case clang::ARM::BI_InterlockedIncrement_acq:
1287 case clang::ARM::BI_InterlockedIncrement64_acq:
1288 return MSVCIntrin::_InterlockedIncrement_acq;
1289 case clang::ARM::BI_InterlockedIncrement16_rel:
1290 case clang::ARM::BI_InterlockedIncrement_rel:
1291 case clang::ARM::BI_InterlockedIncrement64_rel:
1292 return MSVCIntrin::_InterlockedIncrement_rel;
1293 case clang::ARM::BI_InterlockedIncrement16_nf:
1294 case clang::ARM::BI_InterlockedIncrement_nf:
1295 case clang::ARM::BI_InterlockedIncrement64_nf:
1296 return MSVCIntrin::_InterlockedIncrement_nf;
1297 case clang::ARM::BI_InterlockedDecrement16_acq:
1298 case clang::ARM::BI_InterlockedDecrement_acq:
1299 case clang::ARM::BI_InterlockedDecrement64_acq:
1300 return MSVCIntrin::_InterlockedDecrement_acq;
1301 case clang::ARM::BI_InterlockedDecrement16_rel:
1302 case clang::ARM::BI_InterlockedDecrement_rel:
1303 case clang::ARM::BI_InterlockedDecrement64_rel:
1304 return MSVCIntrin::_InterlockedDecrement_rel;
1305 case clang::ARM::BI_InterlockedDecrement16_nf:
1306 case clang::ARM::BI_InterlockedDecrement_nf:
1307 case clang::ARM::BI_InterlockedDecrement64_nf:
1308 return MSVCIntrin::_InterlockedDecrement_nf;
1309 }
1310 llvm_unreachable("must return from switch");
1311}
1312
1314translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
1315 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1316 switch (BuiltinID) {
1317 default:
1318 return None;
1319 case clang::AArch64::BI_BitScanForward:
1320 case clang::AArch64::BI_BitScanForward64:
1321 return MSVCIntrin::_BitScanForward;
1322 case clang::AArch64::BI_BitScanReverse:
1323 case clang::AArch64::BI_BitScanReverse64:
1324 return MSVCIntrin::_BitScanReverse;
1325 case clang::AArch64::BI_InterlockedAnd64:
1326 return MSVCIntrin::_InterlockedAnd;
1327 case clang::AArch64::BI_InterlockedExchange64:
1328 return MSVCIntrin::_InterlockedExchange;
1329 case clang::AArch64::BI_InterlockedExchangeAdd64:
1330 return MSVCIntrin::_InterlockedExchangeAdd;
1331 case clang::AArch64::BI_InterlockedExchangeSub64:
1332 return MSVCIntrin::_InterlockedExchangeSub;
1333 case clang::AArch64::BI_InterlockedOr64:
1334 return MSVCIntrin::_InterlockedOr;
1335 case clang::AArch64::BI_InterlockedXor64:
1336 return MSVCIntrin::_InterlockedXor;
1337 case clang::AArch64::BI_InterlockedDecrement64:
1338 return MSVCIntrin::_InterlockedDecrement;
1339 case clang::AArch64::BI_InterlockedIncrement64:
1340 return MSVCIntrin::_InterlockedIncrement;
1341 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
1342 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
1343 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
1344 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
1345 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1346 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
1347 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
1348 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
1349 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
1350 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1351 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
1352 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
1353 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
1354 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
1355 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1356 case clang::AArch64::BI_InterlockedExchange8_acq:
1357 case clang::AArch64::BI_InterlockedExchange16_acq:
1358 case clang::AArch64::BI_InterlockedExchange_acq:
1359 case clang::AArch64::BI_InterlockedExchange64_acq:
1360 return MSVCIntrin::_InterlockedExchange_acq;
1361 case clang::AArch64::BI_InterlockedExchange8_rel:
1362 case clang::AArch64::BI_InterlockedExchange16_rel:
1363 case clang::AArch64::BI_InterlockedExchange_rel:
1364 case clang::AArch64::BI_InterlockedExchange64_rel:
1365 return MSVCIntrin::_InterlockedExchange_rel;
1366 case clang::AArch64::BI_InterlockedExchange8_nf:
1367 case clang::AArch64::BI_InterlockedExchange16_nf:
1368 case clang::AArch64::BI_InterlockedExchange_nf:
1369 case clang::AArch64::BI_InterlockedExchange64_nf:
1370 return MSVCIntrin::_InterlockedExchange_nf;
1371 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
1372 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
1373 case clang::AArch64::BI_InterlockedCompareExchange_acq:
1374 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
1375 return MSVCIntrin::_InterlockedCompareExchange_acq;
1376 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
1377 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
1378 case clang::AArch64::BI_InterlockedCompareExchange_rel:
1379 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
1380 return MSVCIntrin::_InterlockedCompareExchange_rel;
1381 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
1382 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
1383 case clang::AArch64::BI_InterlockedCompareExchange_nf:
1384 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
1385 return MSVCIntrin::_InterlockedCompareExchange_nf;
1386 case clang::AArch64::BI_InterlockedCompareExchange128:
1387 return MSVCIntrin::_InterlockedCompareExchange128;
1388 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
1389 return MSVCIntrin::_InterlockedCompareExchange128_acq;
1390 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
1391 return MSVCIntrin::_InterlockedCompareExchange128_nf;
1392 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
1393 return MSVCIntrin::_InterlockedCompareExchange128_rel;
1394 case clang::AArch64::BI_InterlockedOr8_acq:
1395 case clang::AArch64::BI_InterlockedOr16_acq:
1396 case clang::AArch64::BI_InterlockedOr_acq:
1397 case clang::AArch64::BI_InterlockedOr64_acq:
1398 return MSVCIntrin::_InterlockedOr_acq;
1399 case clang::AArch64::BI_InterlockedOr8_rel:
1400 case clang::AArch64::BI_InterlockedOr16_rel:
1401 case clang::AArch64::BI_InterlockedOr_rel:
1402 case clang::AArch64::BI_InterlockedOr64_rel:
1403 return MSVCIntrin::_InterlockedOr_rel;
1404 case clang::AArch64::BI_InterlockedOr8_nf:
1405 case clang::AArch64::BI_InterlockedOr16_nf:
1406 case clang::AArch64::BI_InterlockedOr_nf:
1407 case clang::AArch64::BI_InterlockedOr64_nf:
1408 return MSVCIntrin::_InterlockedOr_nf;
1409 case clang::AArch64::BI_InterlockedXor8_acq:
1410 case clang::AArch64::BI_InterlockedXor16_acq:
1411 case clang::AArch64::BI_InterlockedXor_acq:
1412 case clang::AArch64::BI_InterlockedXor64_acq:
1413 return MSVCIntrin::_InterlockedXor_acq;
1414 case clang::AArch64::BI_InterlockedXor8_rel:
1415 case clang::AArch64::BI_InterlockedXor16_rel:
1416 case clang::AArch64::BI_InterlockedXor_rel:
1417 case clang::AArch64::BI_InterlockedXor64_rel:
1418 return MSVCIntrin::_InterlockedXor_rel;
1419 case clang::AArch64::BI_InterlockedXor8_nf:
1420 case clang::AArch64::BI_InterlockedXor16_nf:
1421 case clang::AArch64::BI_InterlockedXor_nf:
1422 case clang::AArch64::BI_InterlockedXor64_nf:
1423 return MSVCIntrin::_InterlockedXor_nf;
1424 case clang::AArch64::BI_InterlockedAnd8_acq:
1425 case clang::AArch64::BI_InterlockedAnd16_acq:
1426 case clang::AArch64::BI_InterlockedAnd_acq:
1427 case clang::AArch64::BI_InterlockedAnd64_acq:
1428 return MSVCIntrin::_InterlockedAnd_acq;
1429 case clang::AArch64::BI_InterlockedAnd8_rel:
1430 case clang::AArch64::BI_InterlockedAnd16_rel:
1431 case clang::AArch64::BI_InterlockedAnd_rel:
1432 case clang::AArch64::BI_InterlockedAnd64_rel:
1433 return MSVCIntrin::_InterlockedAnd_rel;
1434 case clang::AArch64::BI_InterlockedAnd8_nf:
1435 case clang::AArch64::BI_InterlockedAnd16_nf:
1436 case clang::AArch64::BI_InterlockedAnd_nf:
1437 case clang::AArch64::BI_InterlockedAnd64_nf:
1438 return MSVCIntrin::_InterlockedAnd_nf;
1439 case clang::AArch64::BI_InterlockedIncrement16_acq:
1440 case clang::AArch64::BI_InterlockedIncrement_acq:
1441 case clang::AArch64::BI_InterlockedIncrement64_acq:
1442 return MSVCIntrin::_InterlockedIncrement_acq;
1443 case clang::AArch64::BI_InterlockedIncrement16_rel:
1444 case clang::AArch64::BI_InterlockedIncrement_rel:
1445 case clang::AArch64::BI_InterlockedIncrement64_rel:
1446 return MSVCIntrin::_InterlockedIncrement_rel;
1447 case clang::AArch64::BI_InterlockedIncrement16_nf:
1448 case clang::AArch64::BI_InterlockedIncrement_nf:
1449 case clang::AArch64::BI_InterlockedIncrement64_nf:
1450 return MSVCIntrin::_InterlockedIncrement_nf;
1451 case clang::AArch64::BI_InterlockedDecrement16_acq:
1452 case clang::AArch64::BI_InterlockedDecrement_acq:
1453 case clang::AArch64::BI_InterlockedDecrement64_acq:
1454 return MSVCIntrin::_InterlockedDecrement_acq;
1455 case clang::AArch64::BI_InterlockedDecrement16_rel:
1456 case clang::AArch64::BI_InterlockedDecrement_rel:
1457 case clang::AArch64::BI_InterlockedDecrement64_rel:
1458 return MSVCIntrin::_InterlockedDecrement_rel;
1459 case clang::AArch64::BI_InterlockedDecrement16_nf:
1460 case clang::AArch64::BI_InterlockedDecrement_nf:
1461 case clang::AArch64::BI_InterlockedDecrement64_nf:
1462 return MSVCIntrin::_InterlockedDecrement_nf;
1463 }
1464 llvm_unreachable("must return from switch");
1465}
1466
1468translateX86ToMsvcIntrin(unsigned BuiltinID) {
1469 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1470 switch (BuiltinID) {
1471 default:
1472 return None;
1473 case clang::X86::BI_BitScanForward:
1474 case clang::X86::BI_BitScanForward64:
1475 return MSVCIntrin::_BitScanForward;
1476 case clang::X86::BI_BitScanReverse:
1477 case clang::X86::BI_BitScanReverse64:
1478 return MSVCIntrin::_BitScanReverse;
1479 case clang::X86::BI_InterlockedAnd64:
1480 return MSVCIntrin::_InterlockedAnd;
1481 case clang::X86::BI_InterlockedCompareExchange128:
1482 return MSVCIntrin::_InterlockedCompareExchange128;
1483 case clang::X86::BI_InterlockedExchange64:
1484 return MSVCIntrin::_InterlockedExchange;
1485 case clang::X86::BI_InterlockedExchangeAdd64:
1486 return MSVCIntrin::_InterlockedExchangeAdd;
1487 case clang::X86::BI_InterlockedExchangeSub64:
1488 return MSVCIntrin::_InterlockedExchangeSub;
1489 case clang::X86::BI_InterlockedOr64:
1490 return MSVCIntrin::_InterlockedOr;
1491 case clang::X86::BI_InterlockedXor64:
1492 return MSVCIntrin::_InterlockedXor;
1493 case clang::X86::BI_InterlockedDecrement64:
1494 return MSVCIntrin::_InterlockedDecrement;
1495 case clang::X86::BI_InterlockedIncrement64:
1496 return MSVCIntrin::_InterlockedIncrement;
1497 }
1498 llvm_unreachable("must return from switch");
1499}
1500
1501// Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
1502Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
1503 const CallExpr *E) {
1504 switch (BuiltinID) {
1505 case MSVCIntrin::_BitScanForward:
1506 case MSVCIntrin::_BitScanReverse: {
1507 Address IndexAddress(EmitPointerWithAlignment(E->getArg(0)));
1508 Value *ArgValue = EmitScalarExpr(E->getArg(1));
1509
1510 llvm::Type *ArgType = ArgValue->getType();
1511 llvm::Type *IndexType = IndexAddress.getElementType();
1512 llvm::Type *ResultType = ConvertType(E->getType());
1513
1514 Value *ArgZero = llvm::Constant::getNullValue(ArgType);
1515 Value *ResZero = llvm::Constant::getNullValue(ResultType);
1516 Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
1517
1518 BasicBlock *Begin = Builder.GetInsertBlock();
1519 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
1520 Builder.SetInsertPoint(End);
1521 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
1522
1523 Builder.SetInsertPoint(Begin);
1524 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
1525 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
1526 Builder.CreateCondBr(IsZero, End, NotZero);
1527 Result->addIncoming(ResZero, Begin);
1528
1529 Builder.SetInsertPoint(NotZero);
1530
1531 if (BuiltinID == MSVCIntrin::_BitScanForward) {
1532 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1533 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
1534 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
1535 Builder.CreateStore(ZeroCount, IndexAddress, false);
1536 } else {
1537 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
1538 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
1539
1540 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1541 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
1542 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
1543 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
1544 Builder.CreateStore(Index, IndexAddress, false);
1545 }
1546 Builder.CreateBr(End);
1547 Result->addIncoming(ResOne, NotZero);
1548
1549 Builder.SetInsertPoint(End);
1550 return Result;
1551 }
1552 case MSVCIntrin::_InterlockedAnd:
1553 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
1554 case MSVCIntrin::_InterlockedExchange:
1555 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
1556 case MSVCIntrin::_InterlockedExchangeAdd:
1557 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
1558 case MSVCIntrin::_InterlockedExchangeSub:
1559 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
1560 case MSVCIntrin::_InterlockedOr:
1561 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
1562 case MSVCIntrin::_InterlockedXor:
1563 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
1564 case MSVCIntrin::_InterlockedExchangeAdd_acq:
1565 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1566 AtomicOrdering::Acquire);
1567 case MSVCIntrin::_InterlockedExchangeAdd_rel:
1568 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1569 AtomicOrdering::Release);
1570 case MSVCIntrin::_InterlockedExchangeAdd_nf:
1571 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1572 AtomicOrdering::Monotonic);
1573 case MSVCIntrin::_InterlockedExchange_acq:
1574 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1575 AtomicOrdering::Acquire);
1576 case MSVCIntrin::_InterlockedExchange_rel:
1577 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1578 AtomicOrdering::Release);
1579 case MSVCIntrin::_InterlockedExchange_nf:
1580 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1581 AtomicOrdering::Monotonic);
1582 case MSVCIntrin::_InterlockedCompareExchange_acq:
1583 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
1584 case MSVCIntrin::_InterlockedCompareExchange_rel:
1585 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
1586 case MSVCIntrin::_InterlockedCompareExchange_nf:
1587 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
1588 case MSVCIntrin::_InterlockedCompareExchange128:
1590 *this, E, AtomicOrdering::SequentiallyConsistent);
1591 case MSVCIntrin::_InterlockedCompareExchange128_acq:
1592 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire);
1593 case MSVCIntrin::_InterlockedCompareExchange128_rel:
1594 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release);
1595 case MSVCIntrin::_InterlockedCompareExchange128_nf:
1596 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic);
1597 case MSVCIntrin::_InterlockedOr_acq:
1598 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1599 AtomicOrdering::Acquire);
1600 case MSVCIntrin::_InterlockedOr_rel:
1601 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1602 AtomicOrdering::Release);
1603 case MSVCIntrin::_InterlockedOr_nf:
1604 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1605 AtomicOrdering::Monotonic);
1606 case MSVCIntrin::_InterlockedXor_acq:
1607 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1608 AtomicOrdering::Acquire);
1609 case MSVCIntrin::_InterlockedXor_rel:
1610 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1611 AtomicOrdering::Release);
1612 case MSVCIntrin::_InterlockedXor_nf:
1613 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1614 AtomicOrdering::Monotonic);
1615 case MSVCIntrin::_InterlockedAnd_acq:
1616 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1617 AtomicOrdering::Acquire);
1618 case MSVCIntrin::_InterlockedAnd_rel:
1619 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1620 AtomicOrdering::Release);
1621 case MSVCIntrin::_InterlockedAnd_nf:
1622 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1623 AtomicOrdering::Monotonic);
1624 case MSVCIntrin::_InterlockedIncrement_acq:
1625 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
1626 case MSVCIntrin::_InterlockedIncrement_rel:
1627 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
1628 case MSVCIntrin::_InterlockedIncrement_nf:
1629 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
1630 case MSVCIntrin::_InterlockedDecrement_acq:
1631 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
1632 case MSVCIntrin::_InterlockedDecrement_rel:
1633 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
1634 case MSVCIntrin::_InterlockedDecrement_nf:
1635 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
1636
1637 case MSVCIntrin::_InterlockedDecrement:
1638 return EmitAtomicDecrementValue(*this, E);
1639 case MSVCIntrin::_InterlockedIncrement:
1640 return EmitAtomicIncrementValue(*this, E);
1641
1642 case MSVCIntrin::__fastfail: {
1643 // Request immediate process termination from the kernel. The instruction
1644 // sequences to do this are documented on MSDN:
1645 // https://msdn.microsoft.com/en-us/library/dn774154.aspx
1646 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
1647 StringRef Asm, Constraints;
1648 switch (ISA) {
1649 default:
1650 ErrorUnsupported(E, "__fastfail call for this architecture");
1651 break;
1652 case llvm::Triple::x86:
1653 case llvm::Triple::x86_64:
1654 Asm = "int $$0x29";
1655 Constraints = "{cx}";
1656 break;
1657 case llvm::Triple::thumb:
1658 Asm = "udf #251";
1659 Constraints = "{r0}";
1660 break;
1661 case llvm::Triple::aarch64:
1662 Asm = "brk #0xF003";
1663 Constraints = "{w0}";
1664 }
1665 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
1666 llvm::InlineAsm *IA =
1667 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1668 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
1669 getLLVMContext(), llvm::AttributeList::FunctionIndex,
1670 llvm::Attribute::NoReturn);
1671 llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
1672 CI->setAttributes(NoReturnAttr);
1673 return CI;
1674 }
1675 }
1676 llvm_unreachable("Incorrect MSVC intrinsic!");
1677}
1678
1679namespace {
1680// ARC cleanup for __builtin_os_log_format
1681struct CallObjCArcUse final : EHScopeStack::Cleanup {
1682 CallObjCArcUse(llvm::Value *object) : object(object) {}
1683 llvm::Value *object;
1684
1685 void Emit(CodeGenFunction &CGF, Flags flags) override {
1686 CGF.EmitARCIntrinsicUse(object);
1687 }
1688};
1689}
1690
1692 BuiltinCheckKind Kind) {
1693 assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
1694 && "Unsupported builtin check kind");
1695
1696 Value *ArgValue = EmitScalarExpr(E);
1697 if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef())
1698 return ArgValue;
1699
1700 SanitizerScope SanScope(this);
1701 Value *Cond = Builder.CreateICmpNE(
1702 ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
1703 EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
1704 SanitizerHandler::InvalidBuiltin,
1706 llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
1707 None);
1708 return ArgValue;
1709}
1710
1711/// Get the argument type for arguments to os_log_helper.
1713 QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
1714 return C.getCanonicalType(UnsignedTy);
1715}
1716
1719 CharUnits BufferAlignment) {
1720 ASTContext &Ctx = getContext();
1721
1723 {
1724 raw_svector_ostream OS(Name);
1725 OS << "__os_log_helper";
1726 OS << "_" << BufferAlignment.getQuantity();
1727 OS << "_" << int(Layout.getSummaryByte());
1728 OS << "_" << int(Layout.getNumArgsByte());
1729 for (const auto &Item : Layout.Items)
1730 OS << "_" << int(Item.getSizeByte()) << "_"
1731 << int(Item.getDescriptorByte());
1732 }
1733
1734 if (llvm::Function *F = CGM.getModule().getFunction(Name))
1735 return F;
1736
1738 FunctionArgList Args;
1739 Args.push_back(ImplicitParamDecl::Create(
1740 Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,
1742 ArgTys.emplace_back(Ctx.VoidPtrTy);
1743
1744 for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
1745 char Size = Layout.Items[I].getSizeByte();
1746 if (!Size)
1747 continue;
1748
1749 QualType ArgTy = getOSLogArgType(Ctx, Size);
1750 Args.push_back(ImplicitParamDecl::Create(
1751 Ctx, nullptr, SourceLocation(),
1752 &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
1754 ArgTys.emplace_back(ArgTy);
1755 }
1756
1757 QualType ReturnTy = Ctx.VoidTy;
1758
1759 // The helper function has linkonce_odr linkage to enable the linker to merge
1760 // identical functions. To ensure the merging always happens, 'noinline' is
1761 // attached to the function when compiling with -Oz.
1762 const CGFunctionInfo &FI =
1764 llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
1765 llvm::Function *Fn = llvm::Function::Create(
1766 FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
1767 Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
1768 CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false);
1770 Fn->setDoesNotThrow();
1771
1772 // Attach 'noinline' at -Oz.
1773 if (CGM.getCodeGenOpts().OptimizeSize == 2)
1774 Fn->addFnAttr(llvm::Attribute::NoInline);
1775
1776 auto NL = ApplyDebugLocation::CreateEmpty(*this);
1777 StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args);
1778
1779 // Create a scope with an artificial location for the body of this function.
1780 auto AL = ApplyDebugLocation::CreateArtificial(*this);
1781
1783 Address BufAddr =
1784 Address(Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Int8Ty,
1785 BufferAlignment);
1786 Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
1787 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
1788 Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
1789 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
1790
1791 unsigned I = 1;
1792 for (const auto &Item : Layout.Items) {
1793 Builder.CreateStore(
1794 Builder.getInt8(Item.getDescriptorByte()),
1795 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
1796 Builder.CreateStore(
1797 Builder.getInt8(Item.getSizeByte()),
1798 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
1799
1800 CharUnits Size = Item.size();
1801 if (!Size.getQuantity())
1802 continue;
1803
1804 Address Arg = GetAddrOfLocalVar(Args[I]);
1805 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
1806 Addr =
1807 Builder.CreateElementBitCast(Addr, Arg.getElementType(), "argDataCast");
1808 Builder.CreateStore(Builder.CreateLoad(Arg), Addr);
1809 Offset += Size;
1810 ++I;
1811 }
1812
1814
1815 return Fn;
1816}
1817
1819 assert(E.getNumArgs() >= 2 &&
1820 "__builtin_os_log_format takes at least 2 arguments");
1821 ASTContext &Ctx = getContext();
1824 Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
1825 llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
1826
1827 // Ignore argument 1, the format string. It is not currently used.
1828 CallArgList Args;
1829 Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy);
1830
1831 for (const auto &Item : Layout.Items) {
1832 int Size = Item.getSizeByte();
1833 if (!Size)
1834 continue;
1835
1836 llvm::Value *ArgVal;
1837
1838 if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
1839 uint64_t Val = 0;
1840 for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
1841 Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
1842 ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
1843 } else if (const Expr *TheExpr = Item.getExpr()) {
1844 ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
1845
1846 // If a temporary object that requires destruction after the full
1847 // expression is passed, push a lifetime-extended cleanup to extend its
1848 // lifetime to the end of the enclosing block scope.
1849 auto LifetimeExtendObject = [&](const Expr *E) {
1850 E = E->IgnoreParenCasts();
1851 // Extend lifetimes of objects returned by function calls and message
1852 // sends.
1853
1854 // FIXME: We should do this in other cases in which temporaries are
1855 // created including arguments of non-ARC types (e.g., C++
1856 // temporaries).
1857 if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E))
1858 return true;
1859 return false;
1860 };
1861
1862 if (TheExpr->getType()->isObjCRetainableType() &&
1863 getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) {
1864 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
1865 "Only scalar can be a ObjC retainable type");
1866 if (!isa<Constant>(ArgVal)) {
1867 CleanupKind Cleanup = getARCCleanupKind();
1868 QualType Ty = TheExpr->getType();
1869 Address Alloca = Address::invalid();
1870 Address Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca);
1871 ArgVal = EmitARCRetain(Ty, ArgVal);
1872 Builder.CreateStore(ArgVal, Addr);
1873 pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty,
1875 Cleanup & EHCleanup);
1876
1877 // Push a clang.arc.use call to ensure ARC optimizer knows that the
1878 // argument has to be alive.
1879 if (CGM.getCodeGenOpts().OptimizationLevel != 0)
1881 }
1882 }
1883 } else {
1884 ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
1885 }
1886
1887 unsigned ArgValSize =
1888 CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
1889 llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
1890 ArgValSize);
1891 ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
1892 CanQualType ArgTy = getOSLogArgType(Ctx, Size);
1893 // If ArgVal has type x86_fp80, zero-extend ArgVal.
1894 ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
1895 Args.add(RValue::get(ArgVal), ArgTy);
1896 }
1897
1898 const CGFunctionInfo &FI =
1901 Layout, BufAddr.getAlignment());
1903 return RValue::get(BufAddr.getPointer());
1904}
1905
1907 unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,
1908 WidthAndSignedness ResultInfo) {
1909 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
1910 Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width &&
1911 !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed;
1912}
1913
1915 CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info,
1916 const clang::Expr *Op2, WidthAndSignedness Op2Info,
1917 const clang::Expr *ResultArg, QualType ResultQTy,
1918 WidthAndSignedness ResultInfo) {
1920 Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&
1921 "Cannot specialize this multiply");
1922
1923 llvm::Value *V1 = CGF.EmitScalarExpr(Op1);
1924 llvm::Value *V2 = CGF.EmitScalarExpr(Op2);
1925
1926 llvm::Value *HasOverflow;
1927 llvm::Value *Result = EmitOverflowIntrinsic(
1928 CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);
1929
1930 // The intrinsic call will detect overflow when the value is > UINT_MAX,
1931 // however, since the original builtin had a signed result, we need to report
1932 // an overflow when the result is greater than INT_MAX.
1933 auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width);
1934 llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax);
1935
1936 llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue);
1937 HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow);
1938
1939 bool isVolatile =
1940 ResultArg->getType()->getPointeeType().isVolatileQualified();
1941 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
1942 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
1943 isVolatile);
1944 return RValue::get(HasOverflow);
1945}
1946
1947/// Determine if a binop is a checked mixed-sign multiply we can specialize.
1948static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
1949 WidthAndSignedness Op1Info,
1950 WidthAndSignedness Op2Info,
1951 WidthAndSignedness ResultInfo) {
1952 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
1953 std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
1954 Op1Info.Signed != Op2Info.Signed;
1955}
1956
1957/// Emit a checked mixed-sign multiply. This is a cheaper specialization of
1958/// the generic checked-binop irgen.
1959static RValue
1961 WidthAndSignedness Op1Info, const clang::Expr *Op2,
1962 WidthAndSignedness Op2Info,
1963 const clang::Expr *ResultArg, QualType ResultQTy,
1964 WidthAndSignedness ResultInfo) {
1965 assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
1966 Op2Info, ResultInfo) &&
1967 "Not a mixed-sign multipliction we can specialize");
1968
1969 // Emit the signed and unsigned operands.
1970 const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
1971 const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
1972 llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
1973 llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
1974 unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
1975 unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
1976
1977 // One of the operands may be smaller than the other. If so, [s|z]ext it.
1978 if (SignedOpWidth < UnsignedOpWidth)
1979 Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
1980 if (UnsignedOpWidth < SignedOpWidth)
1981 Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
1982
1983 llvm::Type *OpTy = Signed->getType();
1984 llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
1985 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
1986 llvm::Type *ResTy = ResultPtr.getElementType();
1987 unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
1988
1989 // Take the absolute value of the signed operand.
1990 llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
1991 llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
1992 llvm::Value *AbsSigned =
1993 CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
1994
1995 // Perform a checked unsigned multiplication.
1996 llvm::Value *UnsignedOverflow;
1997 llvm::Value *UnsignedResult =
1998 EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
1999 Unsigned, UnsignedOverflow);
2000
2001 llvm::Value *Overflow, *Result;
2002 if (ResultInfo.Signed) {
2003 // Signed overflow occurs if the result is greater than INT_MAX or lesser
2004 // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
2005 auto IntMax =
2006 llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth);
2007 llvm::Value *MaxResult =
2008 CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
2009 CGF.Builder.CreateZExt(IsNegative, OpTy));
2010 llvm::Value *SignedOverflow =
2011 CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
2012 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
2013
2014 // Prepare the signed result (possibly by negating it).
2015 llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
2016 llvm::Value *SignedResult =
2017 CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
2018 Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
2019 } else {
2020 // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
2021 llvm::Value *Underflow = CGF.Builder.CreateAnd(
2022 IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
2023 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
2024 if (ResultInfo.Width < OpWidth) {
2025 auto IntMax =
2026 llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
2027 llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
2028 UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
2029 Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
2030 }
2031
2032 // Negate the product if it would be negative in infinite precision.
2033 Result = CGF.Builder.CreateSelect(
2034 IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
2035
2036 Result = CGF.Builder.CreateTrunc(Result, ResTy);
2037 }
2038 assert(Overflow && Result && "Missing overflow or result");
2039
2040 bool isVolatile =
2041 ResultArg->getType()->getPointeeType().isVolatileQualified();
2042 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2043 isVolatile);
2044 return RValue::get(Overflow);
2045}
2046
2047static bool
2049 llvm::SmallPtrSetImpl<const Decl *> &Seen) {
2050 if (const auto *Arr = Ctx.getAsArrayType(Ty))
2051 Ty = Ctx.getBaseElementType(Arr);
2052
2053 const auto *Record = Ty->getAsCXXRecordDecl();
2054 if (!Record)
2055 return false;
2056
2057 // We've already checked this type, or are in the process of checking it.
2058 if (!Seen.insert(Record).second)
2059 return false;
2060
2061 assert(Record->hasDefinition() &&
2062 "Incomplete types should already be diagnosed");
2063
2064 if (Record->isDynamicClass())
2065 return true;
2066
2067 for (FieldDecl *F : Record->fields()) {
2068 if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
2069 return true;
2070 }
2071 return false;
2072}
2073
2074/// Determine if the specified type requires laundering by checking if it is a
2075/// dynamic class type or contains a subobject which is a dynamic class type.
2077 if (!CGM.getCodeGenOpts().StrictVTablePointers)
2078 return false;
2080 return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
2081}
2082
2083RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
2084 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
2085 llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
2086
2087 // The builtin's shift arg may have a different type than the source arg and
2088 // result, but the LLVM intrinsic uses the same type for all values.
2089 llvm::Type *Ty = Src->getType();
2090 ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
2091
2092 // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
2093 unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2094 Function *F = CGM.getIntrinsic(IID, Ty);
2095 return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
2096}
2097
2098// Map math builtins for long-double to f128 version.
2099static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
2100 switch (BuiltinID) {
2101#define MUTATE_LDBL(func) \
2102 case Builtin::BI__builtin_##func##l: \
2103 return Builtin::BI__builtin_##func##f128;
2134 MUTATE_LDBL(nans)
2135 MUTATE_LDBL(inf)
2154 MUTATE_LDBL(huge_val)
2164#undef MUTATE_LDBL
2165 default:
2166 return BuiltinID;
2167 }
2168}
2169
2170RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
2171 const CallExpr *E,
2172 ReturnValueSlot ReturnValue) {
2173 const FunctionDecl *FD = GD.getDecl()->getAsFunction();
2174 // See if we can constant fold this builtin. If so, don't emit it at all.
2175 // TODO: Extend this handling to all builtin calls that we can constant-fold.
2177 if (E->isPRValue() && E->EvaluateAsRValue(Result, CGM.getContext()) &&
2178 !Result.hasSideEffects()) {
2179 if (Result.Val.isInt())
2180 return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
2181 Result.Val.getInt()));
2182 if (Result.Val.isFloat())
2183 return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
2184 Result.Val.getFloat()));
2185 }
2186
2187 // If current long-double semantics is IEEE 128-bit, replace math builtins
2188 // of long-double with f128 equivalent.
2189 // TODO: This mutation should also be applied to other targets other than PPC,
2190 // after backend supports IEEE 128-bit style libcalls.
2191 if (getTarget().getTriple().isPPC64() &&
2192 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
2193 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
2194
2195 // If the builtin has been declared explicitly with an assembler label,
2196 // disable the specialized emitting below. Ideally we should communicate the
2197 // rename in IR, or at least avoid generating the intrinsic calls that are
2198 // likely to get lowered to the renamed library functions.
2199 const unsigned BuiltinIDIfNoAsmLabel =
2200 FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
2201
2202 // There are LLVM math intrinsics/instructions corresponding to math library
2203 // functions except the LLVM op will never set errno while the math library
2204 // might. Also, math builtins have the same semantics as their math library
2205 // twins. Thus, we can transform math library and builtin calls to their
2206 // LLVM counterparts if the call is marked 'const' (known to never set errno).
2207 if (FD->hasAttr<ConstAttr>()) {
2208 switch (BuiltinIDIfNoAsmLabel) {
2209 case Builtin::BIceil:
2210 case Builtin::BIceilf:
2211 case Builtin::BIceill:
2212 case Builtin::BI__builtin_ceil:
2213 case Builtin::BI__builtin_ceilf:
2214 case Builtin::BI__builtin_ceilf16:
2215 case Builtin::BI__builtin_ceill:
2216 case Builtin::BI__builtin_ceilf128:
2218 Intrinsic::ceil,
2219 Intrinsic::experimental_constrained_ceil));
2220
2221 case Builtin::BIcopysign:
2222 case Builtin::BIcopysignf:
2223 case Builtin::BIcopysignl:
2224 case Builtin::BI__builtin_copysign:
2225 case Builtin::BI__builtin_copysignf:
2226 case Builtin::BI__builtin_copysignf16:
2227 case Builtin::BI__builtin_copysignl:
2228 case Builtin::BI__builtin_copysignf128:
2229 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
2230
2231 case Builtin::BIcos:
2232 case Builtin::BIcosf:
2233 case Builtin::BIcosl:
2234 case Builtin::BI__builtin_cos:
2235 case Builtin::BI__builtin_cosf:
2236 case Builtin::BI__builtin_cosf16:
2237 case Builtin::BI__builtin_cosl:
2238 case Builtin::BI__builtin_cosf128:
2240 Intrinsic::cos,
2241 Intrinsic::experimental_constrained_cos));
2242
2243 case Builtin::BIexp:
2244 case Builtin::BIexpf:
2245 case Builtin::BIexpl:
2246 case Builtin::BI__builtin_exp:
2247 case Builtin::BI__builtin_expf:
2248 case Builtin::BI__builtin_expf16:
2249 case Builtin::BI__builtin_expl:
2250 case Builtin::BI__builtin_expf128:
2252 Intrinsic::exp,
2253 Intrinsic::experimental_constrained_exp));
2254
2255 case Builtin::BIexp2:
2256 case Builtin::BIexp2f:
2257 case Builtin::BIexp2l:
2258 case Builtin::BI__builtin_exp2:
2259 case Builtin::BI__builtin_exp2f:
2260 case Builtin::BI__builtin_exp2f16:
2261 case Builtin::BI__builtin_exp2l:
2262 case Builtin::BI__builtin_exp2f128:
2264 Intrinsic::exp2,
2265 Intrinsic::experimental_constrained_exp2));
2266
2267 case Builtin::BIfabs:
2268 case Builtin::BIfabsf:
2269 case Builtin::BIfabsl:
2270 case Builtin::BI__builtin_fabs:
2271 case Builtin::BI__builtin_fabsf:
2272 case Builtin::BI__builtin_fabsf16:
2273 case Builtin::BI__builtin_fabsl:
2274 case Builtin::BI__builtin_fabsf128:
2275 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
2276
2277 case Builtin::BIfloor:
2278 case Builtin::BIfloorf:
2279 case Builtin::BIfloorl:
2280 case Builtin::BI__builtin_floor:
2281 case Builtin::BI__builtin_floorf:
2282 case Builtin::BI__builtin_floorf16:
2283 case Builtin::BI__builtin_floorl:
2284 case Builtin::BI__builtin_floorf128:
2286 Intrinsic::floor,
2287 Intrinsic::experimental_constrained_floor));
2288
2289 case Builtin::BIfma:
2290 case Builtin::BIfmaf:
2291 case Builtin::BIfmal:
2292 case Builtin::BI__builtin_fma:
2293 case Builtin::BI__builtin_fmaf:
2294 case Builtin::BI__builtin_fmaf16:
2295 case Builtin::BI__builtin_fmal:
2296 case Builtin::BI__builtin_fmaf128:
2298 Intrinsic::fma,
2299 Intrinsic::experimental_constrained_fma));
2300
2301 case Builtin::BIfmax:
2302 case Builtin::BIfmaxf:
2303 case Builtin::BIfmaxl:
2304 case Builtin::BI__builtin_fmax:
2305 case Builtin::BI__builtin_fmaxf:
2306 case Builtin::BI__builtin_fmaxf16:
2307 case Builtin::BI__builtin_fmaxl:
2308 case Builtin::BI__builtin_fmaxf128:
2310 Intrinsic::maxnum,
2311 Intrinsic::experimental_constrained_maxnum));
2312
2313 case Builtin::BIfmin:
2314 case Builtin::BIfminf:
2315 case Builtin::BIfminl:
2316 case Builtin::BI__builtin_fmin:
2317 case Builtin::BI__builtin_fminf:
2318 case Builtin::BI__builtin_fminf16:
2319 case Builtin::BI__builtin_fminl:
2320 case Builtin::BI__builtin_fminf128:
2322 Intrinsic::minnum,
2323 Intrinsic::experimental_constrained_minnum));
2324
2325 // fmod() is a special-case. It maps to the frem instruction rather than an
2326 // LLVM intrinsic.
2327 case Builtin::BIfmod:
2328 case Builtin::BIfmodf:
2329 case Builtin::BIfmodl:
2330 case Builtin::BI__builtin_fmod:
2331 case Builtin::BI__builtin_fmodf:
2332 case Builtin::BI__builtin_fmodf16:
2333 case Builtin::BI__builtin_fmodl:
2334 case Builtin::BI__builtin_fmodf128: {
2335 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2336 Value *Arg1 = EmitScalarExpr(E->getArg(0));
2337 Value *Arg2 = EmitScalarExpr(E->getArg(1));
2338 return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
2339 }
2340
2341 case Builtin::BIlog:
2342 case Builtin::BIlogf:
2343 case Builtin::BIlogl:
2344 case Builtin::BI__builtin_log:
2345 case Builtin::BI__builtin_logf:
2346 case Builtin::BI__builtin_logf16:
2347 case Builtin::BI__builtin_logl:
2348 case Builtin::BI__builtin_logf128:
2350 Intrinsic::log,
2351 Intrinsic::experimental_constrained_log));
2352
2353 case Builtin::BIlog10:
2354 case Builtin::BIlog10f:
2355 case Builtin::BIlog10l:
2356 case Builtin::BI__builtin_log10:
2357 case Builtin::BI__builtin_log10f:
2358 case Builtin::BI__builtin_log10f16:
2359 case Builtin::BI__builtin_log10l:
2360 case Builtin::BI__builtin_log10f128:
2362 Intrinsic::log10,
2363 Intrinsic::experimental_constrained_log10));
2364
2365 case Builtin::BIlog2:
2366 case Builtin::BIlog2f:
2367 case Builtin::BIlog2l:
2368 case Builtin::BI__builtin_log2:
2369 case Builtin::BI__builtin_log2f:
2370 case Builtin::BI__builtin_log2f16:
2371 case Builtin::BI__builtin_log2l:
2372 case Builtin::BI__builtin_log2f128:
2374 Intrinsic::log2,
2375 Intrinsic::experimental_constrained_log2));
2376
2377 case Builtin::BInearbyint:
2378 case Builtin::BInearbyintf:
2379 case Builtin::BInearbyintl:
2380 case Builtin::BI__builtin_nearbyint:
2381 case Builtin::BI__builtin_nearbyintf:
2382 case Builtin::BI__builtin_nearbyintl:
2383 case Builtin::BI__builtin_nearbyintf128:
2385 Intrinsic::nearbyint,
2386 Intrinsic::experimental_constrained_nearbyint));
2387
2388 case Builtin::BIpow:
2389 case Builtin::BIpowf:
2390 case Builtin::BIpowl:
2391 case Builtin::BI__builtin_pow:
2392 case Builtin::BI__builtin_powf:
2393 case Builtin::BI__builtin_powf16:
2394 case Builtin::BI__builtin_powl:
2395 case Builtin::BI__builtin_powf128:
2397 Intrinsic::pow,
2398 Intrinsic::experimental_constrained_pow));
2399
2400 case Builtin::BIrint:
2401 case Builtin::BIrintf:
2402 case Builtin::BIrintl:
2403 case Builtin::BI__builtin_rint:
2404 case Builtin::BI__builtin_rintf:
2405 case Builtin::BI__builtin_rintf16:
2406 case Builtin::BI__builtin_rintl:
2407 case Builtin::BI__builtin_rintf128:
2409 Intrinsic::rint,
2410 Intrinsic::experimental_constrained_rint));
2411
2412 case Builtin::BIround:
2413 case Builtin::BIroundf:
2414 case Builtin::BIroundl:
2415 case Builtin::BI__builtin_round:
2416 case Builtin::BI__builtin_roundf:
2417 case Builtin::BI__builtin_roundf16:
2418 case Builtin::BI__builtin_roundl:
2419 case Builtin::BI__builtin_roundf128:
2421 Intrinsic::round,
2422 Intrinsic::experimental_constrained_round));
2423
2424 case Builtin::BIsin:
2425 case Builtin::BIsinf:
2426 case Builtin::BIsinl:
2427 case Builtin::BI__builtin_sin:
2428 case Builtin::BI__builtin_sinf:
2429 case Builtin::BI__builtin_sinf16:
2430 case Builtin::BI__builtin_sinl:
2431 case Builtin::BI__builtin_sinf128:
2433 Intrinsic::sin,
2434 Intrinsic::experimental_constrained_sin));
2435
2436 case Builtin::BIsqrt:
2437 case Builtin::BIsqrtf:
2438 case Builtin::BIsqrtl:
2439 case Builtin::BI__builtin_sqrt:
2440 case Builtin::BI__builtin_sqrtf:
2441 case Builtin::BI__builtin_sqrtf16:
2442 case Builtin::BI__builtin_sqrtl:
2443 case Builtin::BI__builtin_sqrtf128:
2445 Intrinsic::sqrt,
2446 Intrinsic::experimental_constrained_sqrt));
2447
2448 case Builtin::BItrunc:
2449 case Builtin::BItruncf:
2450 case Builtin::BItruncl:
2451 case Builtin::BI__builtin_trunc:
2452 case Builtin::BI__builtin_truncf:
2453 case Builtin::BI__builtin_truncf16:
2454 case Builtin::BI__builtin_truncl:
2455 case Builtin::BI__builtin_truncf128:
2457 Intrinsic::trunc,
2458 Intrinsic::experimental_constrained_trunc));
2459
2460 case Builtin::BIlround:
2461 case Builtin::BIlroundf:
2462 case Builtin::BIlroundl:
2463 case Builtin::BI__builtin_lround:
2464 case Builtin::BI__builtin_lroundf:
2465 case Builtin::BI__builtin_lroundl:
2466 case Builtin::BI__builtin_lroundf128:
2468 *this, E, Intrinsic::lround,
2469 Intrinsic::experimental_constrained_lround));
2470
2471 case Builtin::BIllround:
2472 case Builtin::BIllroundf:
2473 case Builtin::BIllroundl:
2474 case Builtin::BI__builtin_llround:
2475 case Builtin::BI__builtin_llroundf:
2476 case Builtin::BI__builtin_llroundl:
2477 case Builtin::BI__builtin_llroundf128:
2479 *this, E, Intrinsic::llround,
2480 Intrinsic::experimental_constrained_llround));
2481
2482 case Builtin::BIlrint:
2483 case Builtin::BIlrintf:
2484 case Builtin::BIlrintl:
2485 case Builtin::BI__builtin_lrint:
2486 case Builtin::BI__builtin_lrintf:
2487 case Builtin::BI__builtin_lrintl:
2488 case Builtin::BI__builtin_lrintf128:
2490 *this, E, Intrinsic::lrint,
2491 Intrinsic::experimental_constrained_lrint));
2492
2493 case Builtin::BIllrint:
2494 case Builtin::BIllrintf:
2495 case Builtin::BIllrintl:
2496 case Builtin::BI__builtin_llrint:
2497 case Builtin::BI__builtin_llrintf:
2498 case Builtin::BI__builtin_llrintl:
2499 case Builtin::BI__builtin_llrintf128:
2501 *this, E, Intrinsic::llrint,
2502 Intrinsic::experimental_constrained_llrint));
2503
2504 default:
2505 break;
2506 }
2507 }
2508
2509 switch (BuiltinIDIfNoAsmLabel) {
2510 default: break;
2511 case Builtin::BI__builtin___CFStringMakeConstantString:
2512 case Builtin::BI__builtin___NSStringMakeConstantString:
2513 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
2514 case Builtin::BI__builtin_stdarg_start:
2515 case Builtin::BI__builtin_va_start:
2516 case Builtin::BI__va_start:
2517 case Builtin::BI__builtin_va_end:
2518 return RValue::get(
2519 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
2520 ? EmitScalarExpr(E->getArg(0))
2521 : EmitVAListRef(E->getArg(0)).getPointer(),
2522 BuiltinID != Builtin::BI__builtin_va_end));
2523 case Builtin::BI__builtin_va_copy: {
2524 Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
2525 Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
2526
2527 llvm::Type *Type = Int8PtrTy;
2528
2529 DstPtr = Builder.CreateBitCast(DstPtr, Type);
2530 SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
2531 return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
2532 {DstPtr, SrcPtr}));
2533 }
2534 case Builtin::BI__builtin_abs:
2535 case Builtin::BI__builtin_labs:
2536 case Builtin::BI__builtin_llabs: {
2537 // X < 0 ? -X : X
2538 // The negation has 'nsw' because abs of INT_MIN is undefined.
2539 Value *ArgValue = EmitScalarExpr(E->getArg(0));
2540 Value *NegOp = Builder.CreateNSWNeg(ArgValue, "neg");
2541 Constant *Zero = llvm::Constant::getNullValue(ArgValue->getType());
2542 Value *CmpResult = Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
2543 Value *Result = Builder.CreateSelect(CmpResult, NegOp, ArgValue, "abs");
2544 return RValue::get(Result);
2545 }
2546 case Builtin::BI__builtin_complex: {
2547 Value *Real = EmitScalarExpr(E->getArg(0));
2548 Value *Imag = EmitScalarExpr(E->getArg(1));
2549 return RValue::getComplex({Real, Imag});
2550 }
2551 case Builtin::BI__builtin_conj:
2552 case Builtin::BI__builtin_conjf:
2553 case Builtin::BI__builtin_conjl:
2554 case Builtin::BIconj:
2555 case Builtin::BIconjf:
2556 case Builtin::BIconjl: {
2557 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
2558 Value *Real = ComplexVal.first;
2559 Value *Imag = ComplexVal.second;
2560 Imag = Builder.CreateFNeg(Imag, "neg");
2561 return RValue::getComplex(std::make_pair(Real, Imag));
2562 }
2563 case Builtin::BI__builtin_creal:
2564 case Builtin::BI__builtin_crealf:
2565 case Builtin::BI__builtin_creall:
2566 case Builtin::BIcreal:
2567 case Builtin::BIcrealf:
2568 case Builtin::BIcreall: {
2569 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
2570 return RValue::get(ComplexVal.first);
2571 }
2572
2573 case Builtin::BI__builtin_preserve_access_index: {
2574 // Only enabled preserved access index region when debuginfo
2575 // is available as debuginfo is needed to preserve user-level
2576 // access pattern.
2577 if (!getDebugInfo()) {
2578 CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g");
2579 return RValue::get(EmitScalarExpr(E->getArg(0)));
2580 }
2581
2582 // Nested builtin_preserve_access_index() not supported
2584 CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported");
2585 return RValue::get(EmitScalarExpr(E->getArg(0)));
2586 }
2587
2588 IsInPreservedAIRegion = true;
2589 Value *Res = EmitScalarExpr(E->getArg(0));
2590 IsInPreservedAIRegion = false;
2591 return RValue::get(Res);
2592 }
2593
2594 case Builtin::BI__builtin_cimag:
2595 case Builtin::BI__builtin_cimagf:
2596 case Builtin::BI__builtin_cimagl:
2597 case Builtin::BIcimag:
2598 case Builtin::BIcimagf:
2599 case Builtin::BIcimagl: {
2600 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
2601 return RValue::get(ComplexVal.second);
2602 }
2603
2604 case Builtin::BI__builtin_clrsb:
2605 case Builtin::BI__builtin_clrsbl:
2606 case Builtin::BI__builtin_clrsbll: {
2607 // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
2608 Value *ArgValue = EmitScalarExpr(E->getArg(0));
2609
2610 llvm::Type *ArgType = ArgValue->getType();
2611 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
2612
2613 llvm::Type *ResultType = ConvertType(E->getType());
2614 Value *Zero = llvm::Constant::getNullValue(ArgType);
2615 Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
2616 Value *Inverse = Builder.CreateNot(ArgValue, "not");
2617 Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
2618 Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
2619 Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
2620 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
2621 "cast");
2622 return RValue::get(Result);
2623 }
2624 case Builtin::BI__builtin_ctzs:
2625 case Builtin::BI__builtin_ctz:
2626 case Builtin::BI__builtin_ctzl:
2627 case Builtin::BI__builtin_ctzll: {
2629
2630 llvm::Type *ArgType = ArgValue->getType();
2631 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
2632
2633 llvm::Type *ResultType = ConvertType(E->getType());
2634 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
2635 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
2636 if (Result->getType() != ResultType)
2637 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
2638 "cast");
2639 return RValue::get(Result);
2640 }
2641 case Builtin::BI__builtin_clzs:
2642 case Builtin::BI__builtin_clz:
2643 case Builtin::BI__builtin_clzl:
2644 case Builtin::BI__builtin_clzll: {
2646
2647 llvm::Type *ArgType = ArgValue->getType();
2648 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
2649
2650 llvm::Type *ResultType = ConvertType(E->getType());
2651 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
2652 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
2653 if (Result->getType() != ResultType)
2654 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
2655 "cast");
2656 return RValue::get(Result);
2657 }
2658 case Builtin::BI__builtin_ffs:
2659 case Builtin::BI__builtin_ffsl:
2660 case Builtin::BI__builtin_ffsll: {
2661 // ffs(x) -> x ? cttz(x) + 1 : 0
2662 Value *ArgValue = EmitScalarExpr(E->getArg(0));
2663
2664 llvm::Type *ArgType = ArgValue->getType();
2665 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
2666
2667 llvm::Type *ResultType = ConvertType(E->getType());
2668 Value *Tmp =
2669 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
2670 llvm::ConstantInt::get(ArgType, 1));
2671 Value *Zero = llvm::Constant::getNullValue(ArgType);
2672 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
2673 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
2674 if (Result->getType() != ResultType)
2675 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
2676 "cast");
2677 return RValue::get(Result);
2678 }
2679 case Builtin::BI__builtin_parity:
2680 case Builtin::BI__builtin_parityl:
2681 case Builtin::BI__builtin_parityll: {
2682 // parity(x) -> ctpop(x) & 1
2683 Value *ArgValue = EmitScalarExpr(E->getArg(0));
2684
2685 llvm::Type *ArgType = ArgValue->getType();
2686 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
2687
2688 llvm::Type *ResultType = ConvertType(E->getType());
2689 Value *Tmp = Builder.CreateCall(F, ArgValue);
2690 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
2691 if (Result->getType() != ResultType)
2692 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
2693 "cast");
2694 return RValue::get(Result);
2695 }
2696 case Builtin::BI__lzcnt16:
2697 case Builtin::BI__lzcnt:
2698 case Builtin::BI__lzcnt64: {
2699 Value *ArgValue = EmitScalarExpr(E->getArg(0));
2700
2701 llvm::Type *ArgType = ArgValue->getType();
2702 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
2703
2704 llvm::Type *ResultType = ConvertType(E->getType());
2705 Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
2706 if (Result->getType() != ResultType)
2707 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
2708 "cast");
2709 return RValue::get(Result);
2710 }
2711 case Builtin::BI__popcnt16:
2712 case Builtin::BI__popcnt:
2713 case Builtin::BI__popcnt64:
2714 case Builtin::BI__builtin_popcount:
2715 case Builtin::BI__builtin_popcountl:
2716 case Builtin::BI__builtin_popcountll: {
2717 Value *ArgValue = EmitScalarExpr(E->getArg(0));
2718
2719 llvm::Type *ArgType = ArgValue->getType();
2720 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
2721
2722 llvm::Type *ResultType = ConvertType(E->getType());
2723 Value *Result = Builder.CreateCall(F, ArgValue);
2724 if (Result->getType() != ResultType)
2725 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
2726 "cast");
2727 return RValue::get(Result);
2728 }
2729 case Builtin::BI__builtin_unpredictable: {
2730 // Always return the argument of __builtin_unpredictable. LLVM does not
2731 // handle this builtin. Metadata for this builtin should be added directly
2732 // to instructions such as branches or switches that use it.
2733 return RValue::get(EmitScalarExpr(E->getArg(0)));
2734 }
2735 case Builtin::BI__builtin_expect: {
2736 Value *ArgValue = EmitScalarExpr(E->getArg(0));
2737 llvm::Type *ArgType = ArgValue->getType();
2738
2739 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
2740 // Don't generate llvm.expect on -O0 as the backend won't use it for
2741 // anything.
2742 // Note, we still IRGen ExpectedValue because it could have side-effects.
2743 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
2744 return RValue::get(ArgValue);
2745
2746 Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
2747 Value *Result =
2748 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
2749 return RValue::get(Result);
2750 }
2751 case Builtin::BI__builtin_expect_with_probability: {
2752 Value *ArgValue = EmitScalarExpr(E->getArg(0));
2753 llvm::Type *ArgType = ArgValue->getType();
2754
2755 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
2756 llvm::APFloat Probability(0.0);
2757 const Expr *ProbArg = E->getArg(2);
2758 bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext());
2759 assert(EvalSucceed && "probability should be able to evaluate as float");
2760 (void)EvalSucceed;
2761 bool LoseInfo = false;
2762 Probability.convert(llvm::APFloat::IEEEdouble(),
2763 llvm::RoundingMode::Dynamic, &LoseInfo);
2764 llvm::Type *Ty = ConvertType(ProbArg->getType());
2765 Constant *Confidence = ConstantFP::get(Ty, Probability);
2766 // Don't generate llvm.expect.with.probability on -O0 as the backend
2767 // won't use it for anything.
2768 // Note, we still IRGen ExpectedValue because it could have side-effects.
2769 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
2770 return RValue::get(ArgValue);
2771
2772 Function *FnExpect =
2773 CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType);
2774 Value *Result = Builder.CreateCall(
2775 FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval");
2776 return RValue::get(Result);
2777 }
2778 case Builtin::BI__builtin_assume_aligned: {
2779 const Expr *Ptr = E->getArg(0);
2780 Value *PtrValue = EmitScalarExpr(Ptr);
2781 Value *OffsetValue =
2782 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
2783
2784 Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
2785 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
2786 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
2787 AlignmentCI = ConstantInt::get(AlignmentCI->getType(),
2788 llvm::Value::MaximumAlignment);
2789
2790 emitAlignmentAssumption(PtrValue, Ptr,
2791 /*The expr loc is sufficient.*/ SourceLocation(),
2792 AlignmentCI, OffsetValue);
2793 return RValue::get(PtrValue);
2794 }
2795 case Builtin::BI__assume:
2796 case Builtin::BI__builtin_assume: {
2797 if (E->getArg(0)->HasSideEffects(getContext()))
2798 return RValue::get(nullptr);
2799
2800 Value *ArgValue = EmitScalarExpr(E->getArg(0));
2801 Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
2802 return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
2803 }
2804 case Builtin::BI__arithmetic_fence: {
2805 // Create the builtin call if FastMath is selected, and the target
2806 // supports the builtin, otherwise just return the argument.
2807 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2808 llvm::FastMathFlags FMF = Builder.getFastMathFlags();
2809 bool isArithmeticFenceEnabled =
2810 FMF.allowReassoc() &&
2812 QualType ArgType = E->getArg(0)->getType();
2813 if (ArgType->isComplexType()) {
2814 if (isArithmeticFenceEnabled) {
2815 QualType ElementType = ArgType->castAs<ComplexType>()->getElementType();
2816 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
2817 Value *Real = Builder.CreateArithmeticFence(ComplexVal.first,
2818 ConvertType(ElementType));
2819 Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second,
2820 ConvertType(ElementType));
2821 return RValue::getComplex(std::make_pair(Real, Imag));
2822 }
2823 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
2824 Value *Real = ComplexVal.first;
2825 Value *Imag = ComplexVal.second;
2826 return RValue::getComplex(std::make_pair(Real, Imag));
2827 }
2828 Value *ArgValue = EmitScalarExpr(E->getArg(0));
2829 if (isArithmeticFenceEnabled)
2830 return RValue::get(
2831 Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType)));
2832 return RValue::get(ArgValue);
2833 }
2834 case Builtin::BI__builtin_bswap16:
2835 case Builtin::BI__builtin_bswap32:
2836 case Builtin::BI__builtin_bswap64:
2837 case Builtin::BI_byteswap_ushort:
2838 case Builtin::BI_byteswap_ulong:
2839 case Builtin::BI_byteswap_uint64: {
2840 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
2841 }
2842 case Builtin::BI__builtin_bitreverse8:
2843 case Builtin::BI__builtin_bitreverse16:
2844 case Builtin::BI__builtin_bitreverse32:
2845 case Builtin::BI__builtin_bitreverse64: {
2846 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
2847 }
2848 case Builtin::BI__builtin_rotateleft8:
2849 case Builtin::BI__builtin_rotateleft16:
2850 case Builtin::BI__builtin_rotateleft32:
2851 case Builtin::BI__builtin_rotateleft64:
2852 case Builtin::BI_rotl8: // Microsoft variants of rotate left
2853 case Builtin::BI_rotl16:
2854 case Builtin::BI_rotl:
2855 case Builtin::BI_lrotl:
2856 case Builtin::BI_rotl64:
2857 return emitRotate(E, false);
2858
2859 case Builtin::BI__builtin_rotateright8:
2860 case Builtin::BI__builtin_rotateright16:
2861 case Builtin::BI__builtin_rotateright32:
2862 case Builtin::BI__builtin_rotateright64:
2863 case Builtin::BI_rotr8: // Microsoft variants of rotate right
2864 case Builtin::BI_rotr16:
2865 case Builtin::BI_rotr:
2866 case Builtin::BI_lrotr:
2867 case Builtin::BI_rotr64:
2868 return emitRotate(E, true);
2869
2870 case Builtin::BI__builtin_constant_p: {
2871 llvm::Type *ResultType = ConvertType(E->getType());
2872
2873 const Expr *Arg = E->getArg(0);
2874 QualType ArgType = Arg->getType();
2875 // FIXME: The allowance for Obj-C pointers and block pointers is historical
2876 // and likely a mistake.
2877 if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() &&
2878 !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType())
2879 // Per the GCC documentation, only numeric constants are recognized after
2880 // inlining.
2881 return RValue::get(ConstantInt::get(ResultType, 0));
2882
2883 if (Arg->HasSideEffects(getContext()))
2884 // The argument is unevaluated, so be conservative if it might have
2885 // side-effects.
2886 return RValue::get(ConstantInt::get(ResultType, 0));
2887
2888 Value *ArgValue = EmitScalarExpr(Arg);
2889 if (ArgType->isObjCObjectPointerType()) {
2890 // Convert Objective-C objects to id because we cannot distinguish between
2891 // LLVM types for Obj-C classes as they are opaque.
2892 ArgType = CGM.getContext().getObjCIdType();
2893 ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType));
2894 }
2895 Function *F =
2896 CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
2897 Value *Result = Builder.CreateCall(F, ArgValue);
2898 if (Result->getType() != ResultType)
2899 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
2900 return RValue::get(Result);
2901 }
2902 case Builtin::BI__builtin_dynamic_object_size:
2903 case Builtin::BI__builtin_object_size: {
2904 unsigned Type =
2905 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
2906 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
2907
2908 // We pass this builtin onto the optimizer so that it can figure out the
2909 // object size in more complex cases.
2910 bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
2911 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
2912 /*EmittedE=*/nullptr, IsDynamic));
2913 }
2914 case Builtin::BI__builtin_prefetch: {
2915 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
2916 // FIXME: Technically these constants should of type 'int', yes?
2917 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
2918 llvm::ConstantInt::get(Int32Ty, 0);
2919 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
2920 llvm::ConstantInt::get(Int32Ty, 3);
2921 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
2922 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
2923 return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
2924 }
2925 case Builtin::BI__builtin_readcyclecounter: {
2926 Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
2927 return RValue::get(Builder.CreateCall(F));
2928 }
2929 case Builtin::BI__builtin___clear_cache: {
2930 Value *Begin = EmitScalarExpr(E->getArg(0));
2931 Value *End = EmitScalarExpr(E->getArg(1));
2932 Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);
2933 return RValue::get(Builder.CreateCall(F, {Begin, End}));
2934 }
2935 case Builtin::BI__builtin_trap:
2936 return RValue::get(EmitTrapCall(Intrinsic::trap));
2937 case Builtin::BI__debugbreak:
2938 return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
2939 case Builtin::BI__builtin_unreachable: {
2941
2942 // We do need to preserve an insertion point.
2943 EmitBlock(createBasicBlock("unreachable.cont"));
2944
2945 return RValue::get(nullptr);
2946 }
2947
2948 case Builtin::BI__builtin_powi:
2949 case Builtin::BI__builtin_powif:
2950 case Builtin::BI__builtin_powil: {
2951 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
2952 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
2953
2954 if (Builder.getIsFPConstrained()) {
2955 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2956 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
2957 Src0->getType());
2958 return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 }));
2959 }
2960
2961 Function *F = CGM.getIntrinsic(Intrinsic::powi,
2962 { Src0->getType(), Src1->getType() });
2963 return RValue::get(Builder.CreateCall(F, { Src0, Src1 }));
2964 }
2965 case Builtin::BI__builtin_isgreater:
2966 case Builtin::BI__builtin_isgreaterequal:
2967 case Builtin::BI__builtin_isless:
2968 case Builtin::BI__builtin_islessequal:
2969 case Builtin::BI__builtin_islessgreater:
2970 case Builtin::BI__builtin_isunordered: {
2971 // Ordered comparisons: we know the arguments to these are matching scalar
2972 // floating point values.
2973 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2974 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
2975 Value *LHS = EmitScalarExpr(E->getArg(0));
2976 Value *RHS = EmitScalarExpr(E->getArg(1));
2977
2978 switch (BuiltinID) {
2979 default: llvm_unreachable("Unknown ordered comparison");
2980 case Builtin::BI__builtin_isgreater:
2981 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
2982 break;
2983 case Builtin::BI__builtin_isgreaterequal:
2984 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
2985 break;
2986 case Builtin::BI__builtin_isless:
2987 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
2988 break;
2989 case Builtin::BI__builtin_islessequal:
2990 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
2991 break;
2992 case Builtin::BI__builtin_islessgreater:
2993 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
2994 break;
2995 case Builtin::BI__builtin_isunordered:
2996 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
2997 break;
2998 }
2999 // ZExt bool to int type.
3000 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
3001 }
3002 case Builtin::BI__builtin_isnan: {
3003 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3004 Value *V = EmitScalarExpr(E->getArg(0));
3005 llvm::Type *Ty = V->getType();
3006 const llvm::fltSemantics &Semantics = Ty->getFltSemantics();
3007 if (!Builder.getIsFPConstrained() ||
3008 Builder.getDefaultConstrainedExcept() == fp::ebIgnore ||
3009 !Ty->isIEEE()) {
3010 V = Builder.CreateFCmpUNO(V, V, "cmp");
3011 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
3012 }
3013
3014 if (Value *Result = getTargetHooks().testFPKind(V, BuiltinID, Builder, CGM))
3015 return RValue::get(Result);
3016
3017 // NaN has all exp bits set and a non zero significand. Therefore:
3018 // isnan(V) == ((exp mask - (abs(V) & exp mask)) < 0)
3019 unsigned bitsize = Ty->getScalarSizeInBits();
3020 llvm::IntegerType *IntTy = Builder.getIntNTy(bitsize);
3021 Value *IntV = Builder.CreateBitCast(V, IntTy);
3022 APInt AndMask = APInt::getSignedMaxValue(bitsize);
3023 Value *AbsV =
3024 Builder.CreateAnd(IntV, llvm::ConstantInt::get(IntTy, AndMask));
3025 APInt ExpMask = APFloat::getInf(Semantics).bitcastToAPInt();
3026 Value *Sub =
3027 Builder.CreateSub(llvm::ConstantInt::get(IntTy, ExpMask), AbsV);
3028 // V = sign bit (Sub) <=> V = (Sub < 0)
3029 V = Builder.CreateLShr(Sub, llvm::ConstantInt::get(IntTy, bitsize - 1));
3030 if (bitsize > 32)
3031 V = Builder.CreateTrunc(V, ConvertType(E->getType()));
3032 return RValue::get(V);
3033 }
3034
3035 case Builtin::BI__builtin_elementwise_abs: {
3036 Value *Result;
3037 QualType QT = E->getArg(0)->getType();
3038
3039 if (auto *VecTy = QT->getAs<VectorType>())
3040 QT = VecTy->getElementType();
3041 if (QT->isIntegerType())
3042 Result = Builder.CreateBinaryIntrinsic(
3043 llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)),
3044 Builder.getFalse(), nullptr, "elt.abs");
3045 else
3046 Result = emitUnaryBuiltin(*this, E, llvm::Intrinsic::fabs, "elt.abs");
3047
3048 return RValue::get(Result);
3049 }
3050
3051 case Builtin::BI__builtin_elementwise_ceil:
3052 return RValue::get(
3053 emitUnaryBuiltin(*this, E, llvm::Intrinsic::ceil, "elt.ceil"));
3054 case Builtin::BI__builtin_elementwise_floor:
3055 return RValue::get(
3056 emitUnaryBuiltin(*this, E, llvm::Intrinsic::floor, "elt.floor"));
3057 case Builtin::BI__builtin_elementwise_roundeven:
3058 return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::roundeven,
3059 "elt.roundeven"));
3060 case Builtin::BI__builtin_elementwise_trunc:
3061 return RValue::get(
3062 emitUnaryBuiltin(*this, E, llvm::Intrinsic::trunc, "elt.trunc"));
3063
3064 case Builtin::BI__builtin_elementwise_add_sat:
3065 case Builtin::BI__builtin_elementwise_sub_sat: {
3066 Value *Op0 = EmitScalarExpr(E->getArg(0));
3067 Value *Op1 = EmitScalarExpr(E->getArg(1));
3068 Value *Result;
3069 assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected");
3070 QualType Ty = E->getArg(0)->getType();
3071 if (auto *VecTy = Ty->getAs<VectorType>())
3072 Ty = VecTy->getElementType();
3073 bool IsSigned = Ty->isSignedIntegerType();
3074 unsigned Opc;
3075 if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat)
3076 Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat;
3077 else
3078 Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat;
3079 Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat");
3080 return RValue::get(Result);
3081 }
3082
3083 case Builtin::BI__builtin_elementwise_max: {
3084 Value *Op0 = EmitScalarExpr(E->getArg(0));
3085 Value *Op1 = EmitScalarExpr(E->getArg(1));
3086 Value *Result;
3087 if (Op0->getType()->isIntOrIntVectorTy()) {
3088 QualType Ty = E->getArg(0)->getType();
3089 if (auto *VecTy = Ty->getAs<VectorType>())
3090 Ty = VecTy->getElementType();
3091 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
3092 ? llvm::Intrinsic::smax
3093 : llvm::Intrinsic::umax,
3094 Op0, Op1, nullptr, "elt.max");
3095 } else
3096 Result = Builder.CreateMaxNum(Op0, Op1, "elt.max");
3097 return RValue::get(Result);
3098 }
3099 case Builtin::BI__builtin_elementwise_min: {
3100 Value *Op0 = EmitScalarExpr(E->getArg(0));
3101 Value *Op1 = EmitScalarExpr(E->getArg(1));
3102 Value *Result;
3103 if (Op0->getType()->isIntOrIntVectorTy()) {
3104 QualType Ty = E->getArg(0)->getType();
3105 if (auto *VecTy = Ty->getAs<VectorType>())
3106 Ty = VecTy->getElementType();
3107 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
3108 ? llvm::Intrinsic::smin
3109 : llvm::Intrinsic::umin,
3110 Op0, Op1, nullptr, "elt.min");
3111 } else
3112 Result = Builder.CreateMinNum(Op0, Op1, "elt.min");
3113 return RValue::get(Result);
3114 }
3115
3116 case Builtin::BI__builtin_reduce_max: {
3117 auto GetIntrinsicID = [](QualType QT) {
3118 if (auto *VecTy = QT->getAs<VectorType>())
3119 QT = VecTy->getElementType();
3120 if (QT->isSignedIntegerType())
3121 return llvm::Intrinsic::vector_reduce_smax;
3122 if (QT->isUnsignedIntegerType())
3123 return llvm::Intrinsic::vector_reduce_umax;
3124 assert(QT->isFloatingType() && "must have a float here");
3125 return llvm::Intrinsic::vector_reduce_fmax;
3126 };
3128 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
3129 }
3130
3131 case Builtin::BI__builtin_reduce_min: {
3132 auto GetIntrinsicID = [](QualType QT) {
3133 if (auto *VecTy = QT->getAs<VectorType>())
3134 QT = VecTy->getElementType();
3135 if (QT->isSignedIntegerType())
3136 return llvm::Intrinsic::vector_reduce_smin;
3137 if (QT->isUnsignedIntegerType())
3138 return llvm::Intrinsic::vector_reduce_umin;
3139 assert(QT->isFloatingType() && "must have a float here");
3140 return llvm::Intrinsic::vector_reduce_fmin;
3141 };
3142
3144 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
3145 }
3146
3147 case Builtin::BI__builtin_reduce_add:
3149 *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
3150 case Builtin::BI__builtin_reduce_mul:
3152 *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
3153 case Builtin::BI__builtin_reduce_xor:
3155 *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
3156 case Builtin::BI__builtin_reduce_or:
3158 *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));
3159 case Builtin::BI__builtin_reduce_and:
3161 *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
3162
3163 case Builtin::BI__builtin_matrix_transpose: {
3164 auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
3165 Value *MatValue = EmitScalarExpr(E->getArg(0));
3166 MatrixBuilder MB(Builder);
3167 Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(),
3168 MatrixTy->getNumColumns());
3169 return RValue::get(Result);
3170 }
3171
3172 case Builtin::BI__builtin_matrix_column_major_load: {
3173 MatrixBuilder MB(Builder);
3174 // Emit everything that isn't dependent on the first parameter type
3175 Value *Stride = EmitScalarExpr(E->getArg(3));
3176 const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>();
3177 auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>();
3178 assert(PtrTy && "arg0 must be of pointer type");
3179 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
3180
3183 E->getArg(0)->getExprLoc(), FD, 0);
3184 Value *Result = MB.CreateColumnMajorLoad(
3185 Src.getElementType(), Src.getPointer(),
3186 Align(Src.getAlignment().getQuantity()), Stride, IsVolatile,
3187 ResultTy->getNumRows(), ResultTy->getNumColumns(),
3188 "matrix");
3189 return RValue::get(Result);
3190 }
3191
3192 case Builtin::BI__builtin_matrix_column_major_store: {
3193 MatrixBuilder MB(Builder);
3194 Value *Matrix = EmitScalarExpr(E->getArg(0));
3196 Value *Stride = EmitScalarExpr(E->getArg(2));
3197
3198 const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
3199 auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>();
3200 assert(PtrTy && "arg1 must be of pointer type");
3201 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
3202
3204 E->getArg(1)->getExprLoc(), FD, 0);
3205 Value *Result = MB.CreateColumnMajorStore(
3206 Matrix, Dst.getPointer(), Align(Dst.getAlignment().getQuantity()),
3207 Stride, IsVolatile, MatrixTy->getNumRows(), MatrixTy->getNumColumns());
3208 return RValue::get(Result);
3209 }
3210
3211 case Builtin::BIfinite:
3212 case Builtin::BI__finite:
3213 case Builtin::BIfinitef:
3214 case Builtin::BI__finitef:
3215 case Builtin::BIfinitel:
3216 case Builtin::BI__finitel:
3217 case Builtin::BI__builtin_isinf:
3218 case Builtin::BI__builtin_isfinite: {
3219 // isinf(x) --> fabs(x) == infinity
3220 // isfinite(x) --> fabs(x) != infinity
3221 // x != NaN via the ordered compare in either case.
3222 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3223 Value *V = EmitScalarExpr(E->getArg(0));
3224 llvm::Type *Ty = V->getType();
3225 if (!Builder.getIsFPConstrained() ||
3226 Builder.getDefaultConstrainedExcept() == fp::ebIgnore ||
3227 !Ty->isIEEE()) {
3228 Value *Fabs = EmitFAbs(*this, V);
3229 Constant *Infinity = ConstantFP::getInfinity(V->getType());
3230 CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
3231 ? CmpInst::FCMP_OEQ
3232 : CmpInst::FCMP_ONE;
3233 Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
3234 return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
3235 }
3236
3237 if (Value *Result = getTargetHooks().testFPKind(V, BuiltinID, Builder, CGM))
3238 return RValue::get(Result);
3239
3240 // Inf values have all exp bits set and a zero significand. Therefore:
3241 // isinf(V) == ((V << 1) == ((exp mask) << 1))
3242 // isfinite(V) == ((V << 1) < ((exp mask) << 1)) using unsigned comparison
3243 unsigned bitsize = Ty->getScalarSizeInBits();
3244 llvm::IntegerType *IntTy = Builder.getIntNTy(bitsize);
3245 Value *IntV = Builder.CreateBitCast(V, IntTy);
3246 Value *Shl1 = Builder.CreateShl(IntV, 1);
3247 const llvm::fltSemantics &Semantics = Ty->getFltSemantics();
3248 APInt ExpMask = APFloat::getInf(Semantics).bitcastToAPInt();
3249 Value *ExpMaskShl1 = llvm::ConstantInt::get(IntTy, ExpMask.shl(1));
3250 if (BuiltinID == Builtin::BI__builtin_isinf)
3251 V = Builder.CreateICmpEQ(Shl1, ExpMaskShl1);
3252 else
3253 V = Builder.CreateICmpULT(Shl1, ExpMaskShl1);
3254 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
3255 }
3256
3257 case Builtin::BI__builtin_isinf_sign: {
3258 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
3259 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3260 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
3261 Value *Arg = EmitScalarExpr(E->getArg(0));
3262 Value *AbsArg = EmitFAbs(*this, Arg);
3263 Value *IsInf = Builder.CreateFCmpOEQ(
3264 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
3265 Value *IsNeg = EmitSignBit(*this, Arg);
3266
3267 llvm::Type *IntTy = ConvertType(E->getType());
3268 Value *Zero = Constant::getNullValue(IntTy);
3269 Value *One = ConstantInt::get(IntTy, 1);
3270 Value *NegativeOne = ConstantInt::get(IntTy, -1);
3271 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
3272 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
3273 return RValue::get(Result);
3274 }
3275
3276 case Builtin::BI__builtin_isnormal: {
3277 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
3278 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3279 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
3280 Value *V = EmitScalarExpr(E->getArg(0));
3281 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
3282
3283 Value *Abs = EmitFAbs(*this, V);
3284 Value *IsLessThanInf =
3285 Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
3286 APFloat Smallest = APFloat::getSmallestNormalized(
3287 getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
3288 Value *IsNormal =
3289 Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
3290 "isnormal");
3291 V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
3292 V = Builder.CreateAnd(V, IsNormal, "and");
3293 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
3294 }
3295
3296 case Builtin::BI__builtin_flt_rounds: {
3297 Function *F = CGM.getIntrinsic(Intrinsic::flt_rounds);
3298
3299 llvm::Type *ResultType = ConvertType(E->getType());
3300 Value *Result = Builder.CreateCall(F);
3301 if (Result->getType() != ResultType)
3302 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3303 "cast");
3304 return RValue::get(Result);
3305 }
3306
3307 case Builtin::BI__builtin_fpclassify: {
3308 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3309 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
3310 Value *V = EmitScalarExpr(E->getArg(5));
3311 llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
3312
3313 // Create Result
3314 BasicBlock *Begin = Builder.GetInsertBlock();
3315 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
3316 Builder.SetInsertPoint(End);
3317 PHINode *Result =
3318 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
3319 "fpclassify_result");
3320
3321 // if (V==0) return FP_ZERO
3322 Builder.SetInsertPoint(Begin);
3323 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
3324 "iszero");
3325 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
3326 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
3327 Builder.CreateCondBr(IsZero, End, NotZero);
3328 Result->addIncoming(ZeroLiteral, Begin);
3329
3330 // if (V != V) return FP_NAN
3331 Builder.SetInsertPoint(NotZero);
3332 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
3333 Value *NanLiteral = EmitScalarExpr(E->getArg(0));
3334 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
3335 Builder.CreateCondBr(IsNan, End, NotNan);
3336 Result->addIncoming(NanLiteral, NotZero);
3337
3338 // if (fabs(V) == infinity) return FP_INFINITY
3339 Builder.SetInsertPoint(NotNan);
3340 Value *VAbs = EmitFAbs(*this, V);
3341 Value *IsInf =
3342 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
3343 "isinf");
3344 Value *InfLiteral = EmitScalarExpr(E->getArg(1));
3345 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
3346 Builder.CreateCondBr(IsInf, End, NotInf);
3347 Result->addIncoming(InfLiteral, NotNan);
3348
3349 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
3350 Builder.SetInsertPoint(NotInf);
3351 APFloat Smallest = APFloat::getSmallestNormalized(
3352 getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
3353 Value *IsNormal =
3354 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
3355 "isnormal");
3356 Value *NormalResult =
3357 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
3358 EmitScalarExpr(E->getArg(3)));
3359 Builder.CreateBr(End);
3360 Result->addIncoming(NormalResult, NotInf);
3361
3362 // return Result
3363 Builder.SetInsertPoint(End);
3364 return RValue::get(Result);
3365 }
3366
3367 case Builtin::BIalloca:
3368 case Builtin::BI_alloca:
3369 case Builtin::BI__builtin_alloca_uninitialized:
3370 case Builtin::BI__builtin_alloca: {
3371 Value *Size = EmitScalarExpr(E->getArg(0));
3372 const TargetInfo &TI = getContext().getTargetInfo();
3373 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
3374 const Align SuitableAlignmentInBytes =
3375 CGM.getContext()
3377 .getAsAlign();
3378 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
3379 AI->setAlignment(SuitableAlignmentInBytes);
3380 if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
3381 initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
3382 return RValue::get(AI);
3383 }
3384
3385 case Builtin::BI__builtin_alloca_with_align_uninitialized:
3386 case Builtin::BI__builtin_alloca_with_align: {
3387 Value *Size = EmitScalarExpr(E->getArg(0));
3388 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
3389 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
3390 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
3391 const Align AlignmentInBytes =
3392 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign();
3393 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
3394 AI->setAlignment(AlignmentInBytes);
3395 if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)
3396 initializeAlloca(*this, AI, Size, AlignmentInBytes);
3397 return RValue::get(AI);
3398 }
3399
3400 case Builtin::BIbzero:
3401 case Builtin::BI__builtin_bzero: {
3403 Value *SizeVal = EmitScalarExpr(E->getArg(1));
3405 E->getArg(0)->getExprLoc(), FD, 0);
3406 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
3407 return RValue::get(nullptr);
3408 }
3409 case Builtin::BImemcpy:
3410 case Builtin::BI__builtin_memcpy:
3411 case Builtin::BImempcpy:
3412 case Builtin::BI__builtin_mempcpy: {
3415 Value *SizeVal = EmitScalarExpr(E->getArg(2));
3417 E->getArg(0)->getExprLoc(), FD, 0);
3419 E->getArg(1)->getExprLoc(), FD, 1);
3420 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
3421 if (BuiltinID == Builtin::BImempcpy ||
3422 BuiltinID == Builtin::BI__builtin_mempcpy)
3423 return RValue::get(Builder.CreateInBoundsGEP(Dest.getElementType(),
3424 Dest.getPointer(), SizeVal));
3425 else
3426 return RValue::get(Dest.getPointer());
3427 }
3428
3429 case Builtin::BI__builtin_memcpy_inline: {
3432 uint64_t Size =
3433 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
3435 E->getArg(0)->getExprLoc(), FD, 0);
3437 E->getArg(1)->getExprLoc(), FD, 1);
3438 Builder.CreateMemCpyInline(Dest, Src, Size);
3439 return RValue::get(nullptr);
3440 }
3441
3442 case Builtin::BI__builtin_char_memchr:
3443 BuiltinID = Builtin::BI__builtin_memchr;
3444 break;
3445
3446 case Builtin::BI__builtin___memcpy_chk: {
3447 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
3448 Expr::EvalResult SizeResult, DstSizeResult;
3449 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
3450 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
3451 break;
3452 llvm::APSInt Size = SizeResult.Val.getInt();
3453 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
3454 if (Size.ugt(DstSize))
3455 break;
3458 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
3459 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
3460 return RValue::get(Dest.getPointer());
3461 }
3462
3463 case Builtin::BI__builtin_objc_memmove_collectable: {
3464 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
3465 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
3466 Value *SizeVal = EmitScalarExpr(E->getArg(2));
3468 DestAddr, SrcAddr, SizeVal);
3469 return RValue::get(DestAddr.getPointer());
3470 }
3471
3472 case Builtin::BI__builtin___memmove_chk: {
3473 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
3474 Expr::EvalResult SizeResult, DstSizeResult;
3475 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
3476 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
3477 break;
3478 llvm::APSInt Size = SizeResult.Val.getInt();
3479 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
3480 if (Size.ugt(DstSize))
3481 break;
3484 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
3485 Builder.CreateMemMove(Dest, Src, SizeVal, false);
3486 return RValue::get(Dest.getPointer());
3487 }
3488
3489 case Builtin::BImemmove:
3490 case Builtin::BI__builtin_memmove: {
3493 Value *SizeVal = EmitScalarExpr(E->getArg(2));
3495 E->getArg(0)->getExprLoc(), FD, 0);
3497 E->getArg(1)->getExprLoc(), FD, 1);
3498 Builder.CreateMemMove(Dest, Src, SizeVal, false);
3499 return RValue::get(Dest.getPointer());
3500 }
3501 case Builtin::BImemset:
3502 case Builtin::BI__builtin_memset: {
3504 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
3505 Builder.getInt8Ty());
3506 Value *SizeVal = EmitScalarExpr(E->getArg(2));
3508 E->getArg(0)->getExprLoc(), FD, 0);
3509 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
3510 return RValue::get(Dest.getPointer());
3511 }
3512 case Builtin::BI__builtin_memset_inline: {
3514 Value *ByteVal =
3515 Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty());
3516 uint64_t Size =
3517 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
3519 E->getArg(0)->getExprLoc(), FD, 0);
3520 Builder.CreateMemSetInline(Dest, ByteVal, Size);
3521 return RValue::get(nullptr);
3522 }
3523 case Builtin::BI__builtin___memset_chk: {
3524 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
3525 Expr::EvalResult SizeResult, DstSizeResult;
3526 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
3527 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
3528 break;
3529 llvm::APSInt Size = SizeResult.Val.getInt();
3530 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
3531 if (Size.ugt(DstSize))
3532 break;
3534 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
3535 Builder.getInt8Ty());
3536 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
3537 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
3538 return RValue::get(Dest.getPointer());
3539 }
3540 case Builtin::BI__builtin_wmemchr: {
3541 // The MSVC runtime library does not provide a definition of wmemchr, so we
3542 // need an inline implementation.
3543 if (!getTarget().getTriple().isOSMSVCRT())
3544 break;
3545
3546 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
3547 Value *Str = EmitScalarExpr(E->getArg(0));
3548 Value *Chr = EmitScalarExpr(E->getArg(1));
3549 Value *Size = EmitScalarExpr(E->getArg(2));
3550
3551 BasicBlock *Entry = Builder.GetInsertBlock();
3552 BasicBlock *CmpEq = createBasicBlock("wmemchr.eq");
3553 BasicBlock *Next = createBasicBlock("wmemchr.next");
3554 BasicBlock *Exit = createBasicBlock("wmemchr.exit");
3555 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
3556 Builder.CreateCondBr(SizeEq0, Exit, CmpEq);
3557
3558 EmitBlock(CmpEq);
3559 PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2);
3560 StrPhi->addIncoming(Str, Entry);
3561 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
3562 SizePhi->addIncoming(Size, Entry);
3563 CharUnits WCharAlign =
3565 Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign);
3566 Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0);
3567 Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr);
3568 Builder.CreateCondBr(StrEqChr, Exit, Next);
3569
3570 EmitBlock(Next);
3571 Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1);
3572 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
3573 Value *NextSizeEq0 =
3574 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
3575 Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq);
3576 StrPhi->addIncoming(NextStr, Next);
3577 SizePhi->addIncoming(NextSize, Next);
3578
3579 EmitBlock(Exit);
3580 PHINode *Ret = Builder.CreatePHI(Str->getType(), 3);
3581 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry);
3582 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next);
3583 Ret->addIncoming(FoundChr, CmpEq);
3584 return RValue::get(Ret);
3585 }
3586 case Builtin::BI__builtin_wmemcmp: {
3587 // The MSVC runtime library does not provide a definition of wmemcmp, so we
3588 // need an inline implementation.
3589 if (!getTarget().getTriple().isOSMSVCRT())
3590 break;
3591
3592 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
3593
3594 Value *Dst = EmitScalarExpr(E->getArg(0));
3595 Value *Src = EmitScalarExpr(E->getArg(1));
3596 Value *Size = EmitScalarExpr(E->getArg(2));
3597
3598 BasicBlock *Entry = Builder.GetInsertBlock();
3599 BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
3600 BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
3601 BasicBlock *Next = createBasicBlock("wmemcmp.next");
3602 BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
3603 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
3604 Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
3605
3606 EmitBlock(CmpGT);
3607 PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
3608 DstPhi->addIncoming(Dst, Entry);
3609 PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
3610 SrcPhi->addIncoming(Src, Entry);
3611 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
3612 SizePhi->addIncoming(Size, Entry);
3613 CharUnits WCharAlign =
3615 Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
3616 Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
3617 Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
3618 Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
3619
3620 EmitBlock(CmpLT);
3621 Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
3622 Builder.CreateCondBr(DstLtSrc, Exit, Next);
3623
3624 EmitBlock(Next);
3625 Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
3626 Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
3627 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
3628 Value *NextSizeEq0 =
3629 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
3630 Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
3631 DstPhi->addIncoming(NextDst, Next);
3632 SrcPhi->addIncoming(NextSrc, Next);
3633 SizePhi->addIncoming(NextSize, Next);
3634
3635 EmitBlock(Exit);
3636 PHINode *Ret = Builder.CreatePHI(IntTy, 4);
3637 Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
3638 Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
3639 Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
3640 Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
3641 return RValue::get(Ret);
3642 }
3643 case Builtin::BI__builtin_dwarf_cfa: {
3644 // The offset in bytes from the first argument to the CFA.
3645 //
3646 // Why on earth is this in the frontend? Is there any reason at
3647 // all that the backend can't reasonably determine this while
3648 // lowering llvm.eh.dwarf.cfa()?
3649 //
3650 // TODO: If there's a satisfactory reason, add a target hook for
3651 // this instead of hard-coding 0, which is correct for most targets.
3652 int32_t Offset = 0;
3653
3654 Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
3655 return RValue::get(Builder.CreateCall(F,
3656 llvm::ConstantInt::get(Int32Ty, Offset)));
3657 }
3658 case Builtin::BI__builtin_return_address: {
3659 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
3660 getContext().UnsignedIntTy);
3661 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
3662 return RValue::get(Builder.CreateCall(F, Depth));
3663 }
3664 case Builtin::BI_ReturnAddress: {
3665 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
3666 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
3667 }
3668 case Builtin::BI__builtin_frame_address: {
3669 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
3670 getContext().UnsignedIntTy);
3671 Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);
3672 return RValue::get(Builder.CreateCall(F, Depth));
3673 }
3674 case Builtin::BI__builtin_extract_return_addr: {
3677 return RValue::get(Result);
3678 }
3679 case Builtin::BI__builtin_frob_return_addr: {
3682 return RValue::get(Result);
3683 }
3684 case Builtin::BI__builtin_dwarf_sp_column: {
3685 llvm::IntegerType *Ty
3688 if (Column == -1) {
3689 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
3690 return RValue::get(llvm::UndefValue::get(Ty));
3691 }
3692 return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
3693 }
3694 case Builtin::BI__builtin_init_dwarf_reg_size_table: {
3696 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
3697 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
3698 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
3699 }
3700 case Builtin::BI__builtin_eh_return: {
3701 Value *Int = EmitScalarExpr(E->getArg(0));
3702 Value *Ptr = EmitScalarExpr(E->getArg(1));
3703
3704 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
3705 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
3706 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
3707 Function *F =
3708 CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32
3709 : Intrinsic::eh_return_i64);
3710 Builder.CreateCall(F, {Int, Ptr});
3711 Builder.CreateUnreachable();
3712
3713 // We do need to preserve an insertion point.
3714 EmitBlock(createBasicBlock("builtin_eh_return.cont"));
3715
3716 return RValue::get(nullptr);
3717 }
3718 case Builtin::BI__builtin_unwind_init: {
3719 Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
3720 return RValue::get(Builder.CreateCall(F));
3721 }
3722 case Builtin::BI__builtin_extend_pointer: {
3723 // Extends a pointer to the size of an _Unwind_Word, which is
3724 // uint64_t on all platforms. Generally this gets poked into a
3725 // register and eventually used as an address, so if the
3726 // addressing registers are wider than pointers and the platform
3727 // doesn't implicitly ignore high-order bits when doing
3728 // addressing, we need to make sure we zext / sext based on
3729 // the platform's expectations.
3730 //
3731 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
3732
3733 // Cast the pointer to intptr_t.
3734 Value *Ptr = EmitScalarExpr(E->getArg(0));
3735 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
3736
3737 // If that's 64 bits, we're done.
3738 if (IntPtrTy->getBitWidth() == 64)
3739 return RValue::get(Result);
3740
3741 // Otherwise, ask the codegen data what to do.
3742 if (getTargetHooks().extendPointerWithSExt())
3743 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
3744 else
3745 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
3746 }
3747 case Builtin::BI__builtin_setjmp: {
3748 // Buffer is a void**.
3750
3751 // Store the frame pointer to the setjmp buffer.
3752 Value *FrameAddr = Builder.CreateCall(
3753 CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
3754 ConstantInt::get(Int32Ty, 0));
3755 Builder.CreateStore(FrameAddr, Buf);
3756
3757 // Store the stack pointer to the setjmp buffer.
3758 Value *StackAddr =
3759 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
3760 Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
3761 Builder.CreateStore(StackAddr, StackSaveSlot);
3762
3763 // Call LLVM's EH setjmp, which is lightweight.
3764 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
3765 Buf = Builder.CreateElementBitCast(Buf, Int8Ty);
3766 return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
3767 }
3768 case Builtin::BI__builtin_longjmp: {
3769 Value *Buf = EmitScalarExpr(E->getArg(0));
3770 Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
3771
3772 // Call LLVM's EH longjmp, which is lightweight.
3773 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
3774
3775 // longjmp doesn't return; mark this as unreachable.
3776 Builder.CreateUnreachable();
3777
3778 // We do need to preserve an insertion point.
3779 EmitBlock(createBasicBlock("longjmp.cont"));
3780
3781 return RValue::get(nullptr);
3782 }
3783 case Builtin::BI__builtin_launder: {
3784 const Expr *Arg = E->getArg(0);
3785 QualType ArgTy = Arg->getType()->getPointeeType();
3786 Value *Ptr = EmitScalarExpr(Arg);
3787 if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
3788 Ptr = Builder.CreateLaunderInvariantGroup(Ptr);
3789
3790 return RValue::get(Ptr);
3791 }
3792 case Builtin::BI__sync_fetch_and_add:
3793 case Builtin::BI__sync_fetch_and_sub:
3794 case Builtin::BI__sync_fetch_and_or:
3795 case Builtin::BI__sync_fetch_and_and:
3796 case Builtin::BI__sync_fetch_and_xor:
3797 case Builtin::BI__sync_fetch_and_nand:
3798 case Builtin::BI__sync_add_and_fetch:
3799 case Builtin::BI__sync_sub_and_fetch:
3800 case Builtin::BI__sync_and_and_fetch:
3801 case Builtin::BI__sync_or_and_fetch:
3802 case Builtin::BI__sync_xor_and_fetch:
3803 case Builtin::BI__sync_nand_and_fetch:
3804 case Builtin::BI__sync_val_compare_and_swap:
3805 case Builtin::BI__sync_bool_compare_and_swap:
3806 case Builtin::BI__sync_lock_test_and_set:
3807 case Builtin::BI__sync_lock_release:
3808 case Builtin::BI__sync_swap:
3809 llvm_unreachable("Shouldn't make it through sema");
3810 case Builtin::BI__sync_fetch_and_add_1:
3811 case Builtin::BI__sync_fetch_and_add_2:
3812 case Builtin::BI__sync_fetch_and_add_4:
3813 case Builtin::BI__sync_fetch_and_add_8:
3814 case Builtin::BI__sync_fetch_and_add_16:
3815 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
3816 case Builtin::BI__sync_fetch_and_sub_1:
3817 case Builtin::BI__sync_fetch_and_sub_2:
3818 case Builtin::BI__sync_fetch_and_sub_4:
3819 case Builtin::BI__sync_fetch_and_sub_8:
3820 case Builtin::BI__sync_fetch_and_sub_16:
3821 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
3822 case Builtin::BI__sync_fetch_and_or_1:
3823 case Builtin::BI__sync_fetch_and_or_2:
3824 case Builtin::BI__sync_fetch_and_or_4:
3825 case Builtin::BI__sync_fetch_and_or_8:
3826 case Builtin::BI__sync_fetch_and_or_16:
3827 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
3828 case Builtin::BI__sync_fetch_and_and_1:
3829 case Builtin::BI__sync_fetch_and_and_2:
3830 case Builtin::BI__sync_fetch_and_and_4:
3831 case Builtin::BI__sync_fetch_and_and_8:
3832 case Builtin::BI__sync_fetch_and_and_16:
3833 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
3834 case Builtin::BI__sync_fetch_and_xor_1:
3835 case Builtin::BI__sync_fetch_and_xor_2:
3836 case Builtin::BI__sync_fetch_and_xor_4:
3837 case Builtin::BI__sync_fetch_and_xor_8:
3838 case Builtin::BI__sync_fetch_and_xor_16:
3839 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
3840 case Builtin::BI__sync_fetch_and_nand_1:
3841 case Builtin::BI__sync_fetch_and_nand_2:
3842 case Builtin::BI__sync_fetch_and_nand_4:
3843 case Builtin::BI__sync_fetch_and_nand_8:
3844 case Builtin::BI__sync_fetch_and_nand_16:
3845 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
3846
3847 // Clang extensions: not overloaded yet.
3848 case Builtin::BI__sync_fetch_and_min:
3849 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
3850 case Builtin::BI__sync_fetch_and_max:
3851 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
3852 case Builtin::BI__sync_fetch_and_umin:
3853 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
3854 case Builtin::BI__sync_fetch_and_umax:
3855 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
3856
3857 case Builtin::BI__sync_add_and_fetch_1:
3858 case Builtin::BI__sync_add_and_fetch_2:
3859 case Builtin::BI__sync_add_and_fetch_4:
3860 case Builtin::BI__sync_add_and_fetch_8:
3861 case Builtin::BI__sync_add_and_fetch_16:
3862 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
3863 llvm::Instruction::Add);
3864 case Builtin::BI__sync_sub_and_fetch_1:
3865 case Builtin::BI__sync_sub_and_fetch_2:
3866 case Builtin::BI__sync_sub_and_fetch_4:
3867 case Builtin::BI__sync_sub_and_fetch_8:
3868 case Builtin::BI__sync_sub_and_fetch_16:
3869 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
3870 llvm::Instruction::Sub);
3871 case Builtin::BI__sync_and_and_fetch_1:
3872 case Builtin::BI__sync_and_and_fetch_2:
3873 case Builtin::BI__sync_and_and_fetch_4:
3874 case Builtin::BI__sync_and_and_fetch_8:
3875 case Builtin::BI__sync_and_and_fetch_16:
3876 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
3877 llvm::Instruction::And);
3878 case Builtin::BI__sync_or_and_fetch_1:
3879 case Builtin::BI__sync_or_and_fetch_2:
3880 case Builtin::BI__sync_or_and_fetch_4:
3881 case Builtin::BI__sync_or_and_fetch_8:
3882 case Builtin::BI__sync_or_and_fetch_16:
3883 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
3884 llvm::Instruction::Or);
3885 case Builtin::BI__sync_xor_and_fetch_1:
3886 case Builtin::BI__sync_xor_and_fetch_2:
3887 case Builtin::BI__sync_xor_and_fetch_4:
3888 case Builtin::BI__sync_xor_and_fetch_8:
3889 case Builtin::BI__sync_xor_and_fetch_16:
3890 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
3891 llvm::Instruction::Xor);
3892 case Builtin::BI__sync_nand_and_fetch_1:
3893 case Builtin::BI__sync_nand_and_fetch_2:
3894 case Builtin::BI__sync_nand_and_fetch_4:
3895 case Builtin::BI__sync_nand_and_fetch_8:
3896 case Builtin::BI__sync_nand_and_fetch_16:
3897 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
3898 llvm::Instruction::And, true);
3899
3900 case Builtin::BI__sync_val_compare_and_swap_1:
3901 case Builtin::BI__sync_val_compare_and_swap_2:
3902 case Builtin::BI__sync_val_compare_and_swap_4:
3903 case Builtin::BI__sync_val_compare_and_swap_8:
3904 case Builtin::BI__sync_val_compare_and_swap_16:
3905 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
3906
3907 case Builtin::BI__sync_bool_compare_and_swap_1:
3908 case Builtin::BI__sync_bool_compare_and_swap_2:
3909 case Builtin::BI__sync_bool_compare_and_swap_4:
3910 case Builtin::BI__sync_bool_compare_and_swap_8:
3911 case Builtin::BI__sync_bool_compare_and_swap_16:
3912 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
3913
3914 case Builtin::BI__sync_swap_1:
3915 case Builtin::BI__sync_swap_2:
3916 case Builtin::BI__sync_swap_4:
3917 case Builtin::BI__sync_swap_8:
3918 case Builtin::BI__sync_swap_16:
3919 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
3920
3921 case Builtin::BI__sync_lock_test_and_set_1:
3922 case Builtin::BI__sync_lock_test_and_set_2:
3923 case Builtin::BI__sync_lock_test_and_set_4:
3924 case Builtin::BI__sync_lock_test_and_set_8:
3925 case Builtin::BI__sync_lock_test_and_set_16:
3926 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
3927
3928 case Builtin::BI__sync_lock_release_1:
3929 case Builtin::BI__sync_lock_release_2:
3930 case Builtin::BI__sync_lock_release_4:
3931 case Builtin::BI__sync_lock_release_8:
3932 case Builtin::BI__sync_lock_release_16: {
3933 Value *Ptr = EmitScalarExpr(E->getArg(0));
3934 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
3935 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
3936 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
3937 StoreSize.getQuantity() * 8);
3938 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
3939 llvm::StoreInst *Store =
3940 Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
3941 StoreSize);
3942 Store->setAtomic(llvm::AtomicOrdering::Release);
3943 return RValue::get(nullptr);
3944 }
3945
3946 case Builtin::BI__sync_synchronize: {
3947 // We assume this is supposed to correspond to a C++0x-style
3948 // sequentially-consistent fence (i.e. this is only usable for
3949 // synchronization, not device I/O or anything like that). This intrinsic
3950 // is really badly designed in the sense that in theory, there isn't
3951 // any way to safely use it... but in practice, it mostly works
3952 // to use it with non-atomic loads and stores to get acquire/release
3953 // semantics.
3954 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
3955 return RValue::get(nullptr);
3956 }
3957
3958 case Builtin::BI__builtin_nontemporal_load:
3959 return RValue::get(EmitNontemporalLoad(*this, E));
3960 case Builtin::BI__builtin_nontemporal_store:
3961 return RValue::get(EmitNontemporalStore(*this, E));
3962 case Builtin::BI__c11_atomic_is_lock_free:
3963 case Builtin::BI__atomic_is_lock_free: {
3964 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
3965 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
3966 // _Atomic(T) is always properly-aligned.
3967 const char *LibCallName = "__atomic_is_lock_free";
3968 CallArgList Args;
3969 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
3970 getContext().getSizeType());
3971 if (BuiltinID == Builtin::BI__atomic_is_lock_free)
3972 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
3974 else
3975 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
3977 const CGFunctionInfo &FuncInfo =
3979 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
3980 llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
3981 return EmitCall(FuncInfo, CGCallee::forDirect(Func),
3982 ReturnValueSlot(), Args);
3983 }
3984
3985 case Builtin::BI__atomic_test_and_set: {
3986 // Look at the argument type to determine whether this is a volatile
3987 // operation. The parameter type is always volatile.
3988 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
3989 bool Volatile =
3990 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
3991
3992 Value *Ptr = EmitScalarExpr(E->getArg(0));
3993 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
3994 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
3995 Value *NewVal = Builder.getInt8(1);
3996 Value *Order = EmitScalarExpr(E->getArg(1));
3997 if (isa<llvm::ConstantInt>(Order)) {
3998 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
3999 AtomicRMWInst *Result = nullptr;
4000 switch (ord) {
4001 case 0: // memory_order_relaxed
4002 default: // invalid order
4003 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4004 llvm::AtomicOrdering::Monotonic);
4005 break;
4006 case 1: // memory_order_consume
4007 case 2: // memory_order_acquire
4008 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4009 llvm::AtomicOrdering::Acquire);
4010 break;
4011 case 3: // memory_order_release
4012 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4013 llvm::AtomicOrdering::Release);
4014 break;
4015 case 4: // memory_order_acq_rel
4016
4017 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4018 llvm::AtomicOrdering::AcquireRelease);
4019 break;
4020 case 5: // memory_order_seq_cst
4021 Result = Builder.CreateAtomicRMW(
4022 llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4023 llvm::AtomicOrdering::SequentiallyConsistent);
4024 break;
4025 }
4026 Result->setVolatile(Volatile);
4027 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
4028 }
4029
4030 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4031
4032 llvm::BasicBlock *BBs[5] = {
4033 createBasicBlock("monotonic", CurFn),
4034 createBasicBlock("acquire", CurFn),
4035 createBasicBlock("release", CurFn),
4036 createBasicBlock("acqrel", CurFn),
4037 createBasicBlock("seqcst", CurFn)
4038 };
4039 llvm::AtomicOrdering Orders[5] = {
4040 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
4041 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
4042 llvm::AtomicOrdering::SequentiallyConsistent};
4043
4044 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4045 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
4046
4047 Builder.SetInsertPoint(ContBB);
4048 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
4049
4050 for (unsigned i = 0; i < 5; ++i) {
4051 Builder.SetInsertPoint(BBs[i]);
4052 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
4053 Ptr, NewVal, Orders[i]);
4054 RMW->setVolatile(Volatile);
4055 Result->addIncoming(RMW, BBs[i]);
4056 Builder.CreateBr(ContBB);
4057 }
4058
4059 SI->addCase(Builder.getInt32(0), BBs[0]);
4060 SI->addCase(Builder.getInt32(1), BBs[1]);
4061 SI->addCase(Builder.getInt32(2), BBs[1]);
4062 SI->addCase(Builder.getInt32(3), BBs[2]);
4063 SI->addCase(Builder.getInt32(4), BBs[3]);
4064 SI->addCase(Builder.getInt32(5), BBs[4]);
4065
4066 Builder.SetInsertPoint(ContBB);
4067 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
4068 }
4069
4070 case Builtin::BI__atomic_clear: {
4071 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
4072 bool Volatile =
4073 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
4074
4076 Ptr = Builder.CreateElementBitCast(Ptr, Int8Ty);
4077 Value *NewVal = Builder.getInt8(0);
4078 Value *Order = EmitScalarExpr(E->getArg(1));
4079 if (isa<llvm::ConstantInt>(Order)) {
4080 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4081 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
4082 switch (ord) {
4083 case 0: // memory_order_relaxed
4084 default: // invalid order
4085 Store->setOrdering(llvm::AtomicOrdering::Monotonic);
4086 break;
4087 case 3: // memory_order_release
4088 Store->setOrdering(llvm::AtomicOrdering::Release);
4089 break;
4090 case 5: // memory_order_seq_cst
4091 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
4092 break;
4093 }
4094 return RValue::get(nullptr);
4095 }
4096
4097 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4098
4099 llvm::BasicBlock *BBs[3] = {
4100 createBasicBlock("monotonic", CurFn),
4101 createBasicBlock("release", CurFn),
4102 createBasicBlock("seqcst", CurFn)
4103 };
4104 llvm::AtomicOrdering Orders[3] = {
4105 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
4106 llvm::AtomicOrdering::SequentiallyConsistent};
4107
4108 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4109 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
4110
4111 for (unsigned i = 0; i < 3; ++i) {
4112 Builder.SetInsertPoint(BBs[i]);
4113 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
4114 Store->setOrdering(Orders[i]);
4115 Builder.CreateBr(ContBB);
4116 }
4117
4118 SI->addCase(Builder.getInt32(0), BBs[0]);
4119 SI->addCase(Builder.getInt32(3), BBs[1]);
4120 SI->addCase(Builder.getInt32(5), BBs[2]);
4121
4122 Builder.SetInsertPoint(ContBB);
4123 return RValue::get(nullptr);
4124 }
4125
4126 case Builtin::BI__atomic_thread_fence:
4127 case Builtin::BI__atomic_signal_fence:
4128 case Builtin::BI__c11_atomic_thread_fence:
4129 case Builtin::BI__c11_atomic_signal_fence: {
4130 llvm::SyncScope::ID SSID;
4131 if (BuiltinID == Builtin::BI__atomic_signal_fence ||
4132 BuiltinID == Builtin::BI__c11_atomic_signal_fence)
4133 SSID = llvm::SyncScope::SingleThread;
4134 else
4135 SSID = llvm::SyncScope::System;
4136 Value *Order = EmitScalarExpr(E->getArg(0));
4137 if (isa<llvm::ConstantInt>(Order)) {
4138 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4139 switch (ord) {
4140 case 0: // memory_order_relaxed
4141 default: // invalid order
4142 break;
4143 case 1: // memory_order_consume
4144 case 2: // memory_order_acquire
4145 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
4146 break;
4147 case 3: // memory_order_release
4148 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
4149 break;
4150 case 4: // memory_order_acq_rel
4151 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
4152 break;
4153 case 5: // memory_order_seq_cst
4154 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
4155 break;
4156 }
4157 return RValue::get(nullptr);
4158 }
4159
4160 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
4161 AcquireBB = createBasicBlock("acquire", CurFn);
4162 ReleaseBB = createBasicBlock("release", CurFn);
4163 AcqRelBB = createBasicBlock("acqrel", CurFn);
4164 SeqCstBB = createBasicBlock("seqcst", CurFn);
4165 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4166
4167 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4168 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
4169
4170 Builder.SetInsertPoint(AcquireBB);
4171 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
4172 Builder.CreateBr(ContBB);
4173 SI->addCase(Builder.getInt32(1), AcquireBB);
4174 SI->addCase(Builder.getInt32(2), AcquireBB);
4175
4176 Builder.SetInsertPoint(ReleaseBB);
4177 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
4178 Builder.CreateBr(ContBB);
4179 SI->addCase(Builder.getInt32(3), ReleaseBB);
4180
4181 Builder.SetInsertPoint(AcqRelBB);
4182 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
4183 Builder.CreateBr(ContBB);
4184 SI->addCase(Builder.getInt32(4), AcqRelBB);
4185
4186 Builder.SetInsertPoint(SeqCstBB);
4187 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
4188 Builder.CreateBr(ContBB);
4189 SI->addCase(Builder.getInt32(5), SeqCstBB);
4190
4191 Builder.SetInsertPoint(ContBB);
4192 return RValue::get(nullptr);
4193 }
4194
4195 case Builtin::BI__builtin_signbit:
4196 case Builtin::BI__builtin_signbitf:
4197 case Builtin::BI__builtin_signbitl: {
4198 return RValue::get(
4199 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
4200 ConvertType(E->getType())));
4201 }
4202 case Builtin::BI__warn_memset_zero_len:
4203 return RValue::getIgnored();
4204 case Builtin::BI__annotation: {
4205 // Re-encode each wide string to UTF8 and make an MDString.
4207 for (const Expr *Arg : E->arguments()) {
4208 const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
4209 assert(Str->getCharByteWidth() == 2);
4210 StringRef WideBytes = Str->getBytes();
4211 std::string StrUtf8;
4212 if (!convertUTF16ToUTF8String(
4213 makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
4214 CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
4215 continue;
4216 }
4217 Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
4218 }
4219
4220 // Build and MDTuple of MDStrings and emit the intrinsic call.
4221 llvm::Function *F =
4222 CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
4223 MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
4224 Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
4225 return RValue::getIgnored();
4226 }
4227 case Builtin::BI__builtin_annotation: {
4228 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
4229 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
4230 AnnVal->getType());
4231
4232 // Get the annotation string, go through casts. Sema requires this to be a
4233 // non-wide string literal, potentially casted, so the cast<> is safe.
4234 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
4235 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
4236 return RValue::get(
4237 EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr));
4238 }
4239 case Builtin::BI__builtin_addcb:
4240 case Builtin::BI__builtin_addcs:
4241 case Builtin::BI__builtin_addc:
4242 case Builtin::BI__builtin_addcl:
4243 case Builtin::BI__builtin_addcll:
4244 case Builtin::BI__builtin_subcb:
4245 case Builtin::BI__builtin_subcs:
4246 case Builtin::BI__builtin_subc:
4247 case Builtin::BI__builtin_subcl:
4248 case Builtin::BI__builtin_subcll: {
4249
4250 // We translate all of these builtins from expressions of the form:
4251 // int x = ..., y = ..., carryin = ..., carryout, result;
4252 // result = __builtin_addc(x, y, carryin, &carryout);
4253 //
4254 // to LLVM IR of the form:
4255 //
4256 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
4257 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
4258 // %carry1 = extractvalue {i32, i1} %tmp1, 1
4259 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
4260 // i32 %carryin)
4261 // %result = extractvalue {i32, i1} %tmp2, 0
4262 // %carry2 = extractvalue {i32, i1} %tmp2, 1
4263 // %tmp3 = or i1 %carry1, %carry2
4264 // %tmp4 = zext i1 %tmp3 to i32
4265 // store i32 %tmp4, i32* %carryout
4266
4267 // Scalarize our inputs.
4268 llvm::Value *X = EmitScalarExpr(E->getArg(0));
4269 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
4270 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
4271 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
4272
4273 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
4274 llvm::Intrinsic::ID IntrinsicId;
4275 switch (BuiltinID) {
4276 default: llvm_unreachable("Unknown multiprecision builtin id.");
4277 case Builtin::BI__builtin_addcb:
4278 case Builtin::BI__builtin_addcs:
4279 case Builtin::BI__builtin_addc:
4280 case Builtin::BI__builtin_addcl:
4281 case Builtin::BI__builtin_addcll:
4282 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
4283 break;
4284 case Builtin::BI__builtin_subcb:
4285 case Builtin::BI__builtin_subcs:
4286 case Builtin::BI__builtin_subc:
4287 case Builtin::BI__builtin_subcl:
4288 case Builtin::BI__builtin_subcll:
4289 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
4290 break;
4291 }
4292
4293 // Construct our resulting LLVM IR expression.
4294 llvm::Value *Carry1;
4295 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
4296 X, Y, Carry1);
4297 llvm::Value *Carry2;
4298 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
4299 Sum1, Carryin, Carry2);
4300 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
4301 X->getType());
4302 Builder.CreateStore(CarryOut, CarryOutPtr);
4303 return RValue::get(Sum2);
4304 }
4305
4306 case Builtin::BI__builtin_add_overflow:
4307 case Builtin::BI__builtin_sub_overflow:
4308 case Builtin::BI__builtin_mul_overflow: {
4309 const clang::Expr *LeftArg = E->getArg(0);
4310 const clang::Expr *RightArg = E->getArg(1);
4311 const clang::Expr *ResultArg = E->getArg(2);
4312
4313 clang::QualType ResultQTy =
4314 ResultArg->getType()->castAs<PointerType>()->getPointeeType();
4315
4316 WidthAndSignedness LeftInfo =
4318 WidthAndSignedness RightInfo =
4320 WidthAndSignedness ResultInfo =
4322
4323 // Handle mixed-sign multiplication as a special case, because adding
4324 // runtime or backend support for our generic irgen would be too expensive.
4325 if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
4326 return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
4327 RightInfo, ResultArg, ResultQTy,
4328 ResultInfo);
4329
4330 if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo,
4331 ResultInfo))
4333 *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy,
4334 ResultInfo);
4335
4336 WidthAndSignedness EncompassingInfo =
4337 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
4338
4339 llvm::Type *EncompassingLLVMTy =
4340 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
4341
4342 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
4343
4344 llvm::Intrinsic::ID IntrinsicId;
4345 switch (BuiltinID) {
4346 default:
4347 llvm_unreachable("Unknown overflow builtin id.");
4348 case Builtin::BI__builtin_add_overflow:
4349 IntrinsicId = EncompassingInfo.Signed
4350 ? llvm::Intrinsic::sadd_with_overflow
4351 : llvm::Intrinsic::uadd_with_overflow;
4352 break;
4353 case Builtin::BI__builtin_sub_overflow:
4354 IntrinsicId = EncompassingInfo.Signed
4355 ? llvm::Intrinsic::ssub_with_overflow
4356 : llvm::Intrinsic::usub_with_overflow;
4357 break;
4358 case Builtin::BI__builtin_mul_overflow:
4359 IntrinsicId = EncompassingInfo.Signed
4360 ? llvm::Intrinsic::smul_with_overflow
4361 : llvm::Intrinsic::umul_with_overflow;
4362 break;
4363 }
4364
4365 llvm::Value *Left = EmitScalarExpr(LeftArg);
4366 llvm::Value *Right = EmitScalarExpr(RightArg);
4367 Address ResultPtr = EmitPointerWithAlignment(ResultArg);
4368
4369 // Extend each operand to the encompassing type.
4370 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
4371 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
4372
4373 // Perform the operation on the extended values.
4374 llvm::Value *Overflow, *Result;
4375 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
4376
4377 if (EncompassingInfo.Width > ResultInfo.Width) {
4378 // The encompassing type is wider than the result type, so we need to
4379 // truncate it.
4380 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
4381
4382 // To see if the truncation caused an overflow, we will extend
4383 // the result and then compare it to the original result.
4384 llvm::Value *ResultTruncExt = Builder.CreateIntCast(
4385 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
4386 llvm::Value *TruncationOverflow =
4387 Builder.CreateICmpNE(Result, ResultTruncExt);
4388
4389 Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
4390 Result = ResultTrunc;
4391 }
4392
4393 // Finally, store the result using the pointer.
4394 bool isVolatile =
4395 ResultArg->getType()->getPointeeType().isVolatileQualified();
4396 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
4397
4398 return RValue::get(Overflow);
4399 }
4400
4401 case Builtin::BI__builtin_uadd_overflow:
4402 case Builtin::BI__builtin_uaddl_overflow:
4403 case Builtin::BI__builtin_uaddll_overflow:
4404 case Builtin::BI__builtin_usub_overflow:
4405 case Builtin::BI__builtin_usubl_overflow:
4406 case Builtin::BI__builtin_usubll_overflow:
4407 case Builtin::BI__builtin_umul_overflow:
4408 case Builtin::BI__builtin_umull_overflow:
4409 case Builtin::BI__builtin_umulll_overflow:
4410 case Builtin::BI__builtin_sadd_overflow:
4411 case Builtin::BI__builtin_saddl_overflow:
4412 case Builtin::BI__builtin_saddll_overflow:
4413 case Builtin::BI__builtin_ssub_overflow:
4414 case Builtin::BI__builtin_ssubl_overflow:
4415 case Builtin::BI__builtin_ssubll_overflow:
4416 case Builtin::BI__builtin_smul_overflow:
4417 case Builtin::BI__builtin_smull_overflow:
4418 case Builtin::BI__builtin_smulll_overflow: {
4419
4420 // We translate all of these builtins directly to the relevant llvm IR node.
4421
4422 // Scalarize our inputs.
4423 llvm::Value *X = EmitScalarExpr(E->getArg(0));
4424 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
4425 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
4426
4427 // Decide which of the overflow intrinsics we are lowering to:
4428 llvm::Intrinsic::ID IntrinsicId;
4429 switch (BuiltinID) {
4430 default: llvm_unreachable("Unknown overflow builtin id.");
4431 case Builtin::BI__builtin_uadd_overflow:
4432 case Builtin::BI__builtin_uaddl_overflow:
4433 case Builtin::BI__builtin_uaddll_overflow:
4434 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
4435 break;
4436 case Builtin::BI__builtin_usub_overflow:
4437 case Builtin::BI__builtin_usubl_overflow:
4438 case Builtin::BI__builtin_usubll_overflow:
4439 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
4440 break;
4441 case Builtin::BI__builtin_umul_overflow:
4442 case Builtin::BI__builtin_umull_overflow:
4443 case Builtin::BI__builtin_umulll_overflow:
4444 IntrinsicId = llvm::Intrinsic::umul_with_overflow;
4445 break;
4446 case Builtin::BI__builtin_sadd_overflow:
4447 case Builtin::BI__builtin_saddl_overflow:
4448 case Builtin::BI__builtin_saddll_overflow:
4449 IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
4450 break;
4451 case Builtin::BI__builtin_ssub_overflow:
4452 case Builtin::BI__builtin_ssubl_overflow:
4453 case Builtin::BI__builtin_ssubll_overflow:
4454 IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
4455 break;
4456 case Builtin::BI__builtin_smul_overflow:
4457 case Builtin::BI__builtin_smull_overflow:
4458 case Builtin::BI__builtin_smulll_overflow:
4459 IntrinsicId = llvm::Intrinsic::smul_with_overflow;
4460 break;
4461 }
4462
4463
4464 llvm::Value *Carry;
4465 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
4466 Builder.CreateStore(Sum, SumOutPtr);
4467
4468 return RValue::get(Carry);
4469 }
4470 case Builtin::BIaddressof:
4471 case Builtin::BI__addressof:
4472 case Builtin::BI__builtin_addressof:
4473 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
4474 case Builtin::BI__builtin_function_start:
4477 case Builtin::BI__builtin_operator_new:
4479 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
4480 case Builtin::BI__builtin_operator_delete:
4482 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
4483
4484 case Builtin::BI__builtin_is_aligned:
4485 return EmitBuiltinIsAligned(E);
4486 case Builtin::BI__builtin_align_up:
4487 return EmitBuiltinAlignTo(E, true);
4488 case Builtin::BI__builtin_align_down:
4489 return EmitBuiltinAlignTo(E, false);
4490
4491 case Builtin::BI__noop:
4492 // __noop always evaluates to an integer literal zero.
4493 return RValue::get(ConstantInt::get(IntTy, 0));
4494 case Builtin::BI__builtin_call_with_static_chain: {
4495 const CallExpr *Call = cast<CallExpr>(E->getArg(0));
4496 const Expr *Chain = E->getArg(1);
4497 return EmitCall(Call->getCallee()->getType(),
4498 EmitCallee(Call->getCallee()), Call, ReturnValue,
4499 EmitScalarExpr(Chain));
4500 }
4501 case Builtin::BI_InterlockedExchange8:
4502 case Builtin::BI_InterlockedExchange16:
4503 case Builtin::BI_InterlockedExchange:
4504 case Builtin::BI_InterlockedExchangePointer:
4505 return RValue::get(
4506 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
4507 case Builtin::BI_InterlockedCompareExchangePointer:
4508 case Builtin::BI_InterlockedCompareExchangePointer_nf: {
4509 llvm::Type *RTy;
4510 llvm::IntegerType *IntType =
4511 IntegerType::get(getLLVMContext(),
4513 llvm::Type *IntPtrType = IntType->getPointerTo();
4514
4515 llvm::Value *Destination =
4516 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
4517
4518 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
4519 RTy = Exchange->getType();
4520 Exchange = Builder.CreatePtrToInt(Exchange, IntType);
4521
4522 llvm::Value *Comparand =
4523 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
4524
4525 auto Ordering =
4526 BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?
4527 AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;
4528
4529 auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
4530 Ordering, Ordering);
4531 Result->setVolatile(true);
4532
4533 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
4534 0),
4535 RTy));
4536 }
4537 case Builtin::BI_InterlockedCompareExchange8:
4538 case Builtin::BI_InterlockedCompareExchange16:
4539 case Builtin::BI_InterlockedCompareExchange:
4540 case Builtin::BI_InterlockedCompareExchange64:
4541 return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E));
4542 case Builtin::BI_InterlockedIncrement16:
4543 case Builtin::BI_InterlockedIncrement:
4544 return RValue::get(
4545 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
4546 case Builtin::BI_InterlockedDecrement16:
4547 case Builtin::BI_InterlockedDecrement:
4548 return RValue::get(
4549 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
4550 case Builtin::BI_InterlockedAnd8:
4551 case Builtin::BI_InterlockedAnd16:
4552 case Builtin::BI_InterlockedAnd:
4553 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
4554 case Builtin::BI_InterlockedExchangeAdd8:
4555 case Builtin::BI_InterlockedExchangeAdd16:
4556 case Builtin::BI_InterlockedExchangeAdd:
4557 return RValue::get(
4558 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
4559 case Builtin::BI_InterlockedExchangeSub8:
4560 case Builtin::BI_InterlockedExchangeSub16:
4561 case Builtin::BI_InterlockedExchangeSub:
4562 return RValue::get(
4563 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
4564 case Builtin::BI_InterlockedOr8:
4565 case Builtin::BI_InterlockedOr16:
4566 case Builtin::BI_InterlockedOr:
4567 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
4568 case Builtin::BI_InterlockedXor8:
4569 case Builtin::BI_InterlockedXor16:
4570 case Builtin::BI_InterlockedXor:
4571 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
4572
4573 case Builtin::BI_bittest64:
4574 case Builtin::BI_bittest:
4575 case Builtin::BI_bittestandcomplement64:
4576 case Builtin::BI_bittestandcomplement:
4577 case Builtin::BI_bittestandreset64:
4578 case Builtin::BI_bittestandreset:
4579 case Builtin::BI_bittestandset64:
4580 case Builtin::BI_bittestandset:
4581 case Builtin::BI_interlockedbittestandreset:
4582 case Builtin::BI_interlockedbittestandreset64:
4583 case Builtin::BI_interlockedbittestandset64:
4584 case Builtin::BI_interlockedbittestandset:
4585 case Builtin::BI_interlockedbittestandset_acq:
4586 case Builtin::BI_interlockedbittestandset_rel:
4587 case Builtin::BI_interlockedbittestandset_nf:
4588 case Builtin::BI_interlockedbittestandreset_acq:
4589 case Builtin::BI_interlockedbittestandreset_rel:
4590 case Builtin::BI_interlockedbittestandreset_nf:
4591 return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
4592
4593 // These builtins exist to emit regular volatile loads and stores not
4594 // affected by the -fms-volatile setting.
4595 case Builtin::BI__iso_volatile_load8:
4596 case Builtin::BI__iso_volatile_load16:
4597 case Builtin::BI__iso_volatile_load32:
4598 case Builtin::BI__iso_volatile_load64:
4599 return RValue::get(EmitISOVolatileLoad(*this, E));
4600 case Builtin::BI__iso_volatile_store8:
4601 case Builtin::BI__iso_volatile_store16:
4602 case Builtin::BI__iso_volatile_store32:
4603 case Builtin::BI__iso_volatile_store64:
4604 return RValue::get(EmitISOVolatileStore(*this, E));
4605
4606 case Builtin::BI__exception_code:
4607 case Builtin::BI_exception_code:
4609 case Builtin::BI__exception_info:
4610 case Builtin::BI_exception_info:
4612 case Builtin::BI__abnormal_termination:
4613 case Builtin::BI_abnormal_termination:
4615 case Builtin::BI_setjmpex:
4616 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
4617 E->getArg(0)->getType()->isPointerType())
4618 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
4619 break;
4620 case Builtin::BI_setjmp:
4621 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
4622 E->getArg(0)->getType()->isPointerType()) {
4623 if (getTarget().getTriple().getArch() == llvm::Triple::x86)
4624 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
4625 else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
4626 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
4627 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
4628 }
4629 break;
4630
4631 // C++ std:: builtins.
4632 case Builtin::BImove:
4633 case Builtin::BImove_if_noexcept:
4634 case Builtin::BIforward:
4635 case Builtin::BIas_const:
4636 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
4637 case Builtin::BI__GetExceptionInfo: {
4638 if (llvm::GlobalVariable *GV =
4640 return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
4641 break;
4642 }
4643
4644 case Builtin::BI__fastfail:
4645 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
4646
4647 case Builtin::BI__builtin_coro_size: {
4648 auto & Context = getContext();
4649 auto SizeTy = Context.getSizeType();
4650 auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
4651 Function *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
4652 return RValue::get(Builder.CreateCall(F));
4653 }
4654
4655 case Builtin::BI__builtin_coro_id:
4656 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
4657 case Builtin::BI__builtin_coro_promise:
4658 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
4659 case Builtin::BI__builtin_coro_resume:
4660 return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
4661 case Builtin::BI__builtin_coro_frame:
4662 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
4663 case Builtin::BI__builtin_coro_noop:
4664 return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
4665 case Builtin::BI__builtin_coro_free:
4666 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
4667 case Builtin::BI__builtin_coro_destroy:
4668 return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
4669 case Builtin::BI__builtin_coro_done:
4670 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
4671 case Builtin::BI__builtin_coro_alloc:
4672 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
4673 case Builtin::BI__builtin_coro_begin:
4674 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
4675 case Builtin::BI__builtin_coro_end:
4676 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
4677 case Builtin::BI__builtin_coro_suspend:
4678 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
4679
4680 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
4681 case Builtin::BIread_pipe:
4682 case Builtin::BIwrite_pipe: {
4683 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
4684 *Arg1 = EmitScalarExpr(E->getArg(1));
4685 CGOpenCLRuntime OpenCLRT(CGM);
4686 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
4687 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
4688
4689 // Type of the generic packet parameter.
4690 unsigned GenericAS =
4692 llvm::Type *I8PTy = llvm::PointerType::get(
4693 llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
4694
4695 // Testing which overloaded version we should generate the call for.
4696 if (2U == E->getNumArgs()) {
4697 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
4698 : "__write_pipe_2";
4699 // Creating a generic function type to be able to call with any builtin or
4700 // user defined type.
4701 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
4702 llvm::FunctionType *FTy = llvm::FunctionType::get(
4703 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
4704 Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
4705 return RValue::get(
4707 {Arg0, BCast, PacketSize, PacketAlign}));
4708 } else {
4709 assert(4 == E->getNumArgs() &&
4710 "Illegal number of parameters to pipe function");
4711 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
4712 : "__write_pipe_4";
4713
4714 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
4715 Int32Ty, Int32Ty};
4716 Value *Arg2 = EmitScalarExpr(E->getArg(2)),
4717 *Arg3 = EmitScalarExpr(E->getArg(3));
4718 llvm::FunctionType *FTy = llvm::FunctionType::get(
4719 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
4720 Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
4721 // We know the third argument is an integer type, but we may need to cast
4722 // it to i32.
4723 if (Arg2->getType() != Int32Ty)
4724 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
4725 return RValue::get(
4727 {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
4728 }
4729 }
4730 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
4731 // functions
4732 case Builtin::BIreserve_read_pipe:
4733 case Builtin::BIreserve_write_pipe:
4734 case Builtin::BIwork_group_reserve_read_pipe:
4735 case Builtin::BIwork_group_reserve_write_pipe:
4736 case Builtin::BIsub_group_reserve_read_pipe:
4737 case Builtin::BIsub_group_reserve_write_pipe: {
4738 // Composing the mangled name for the function.
4739 const char *Name;
4740 if (BuiltinID == Builtin::BIreserve_read_pipe)
4741 Name = "__reserve_read_pipe";
4742 else if (BuiltinID == Builtin::BIreserve_write_pipe)
4743 Name = "__reserve_write_pipe";
4744 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
4745 Name = "__work_group_reserve_read_pipe";
4746 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
4747 Name = "__work_group_reserve_write_pipe";
4748 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
4749 Name = "__sub_group_reserve_read_pipe";
4750 else
4751 Name = "__sub_group_reserve_write_pipe";
4752
4753 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
4754 *Arg1 = EmitScalarExpr(E->getArg(1));
4755 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
4756 CGOpenCLRuntime OpenCLRT(CGM);
4757 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
4758 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
4759
4760 // Building the generic function prototype.
4761 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
4762 llvm::FunctionType *FTy = llvm::FunctionType::get(
4763 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
4764 // We know the second argument is an integer type, but we may need to cast
4765 // it to i32.
4766 if (Arg1->getType() != Int32Ty)
4767 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
4769 {Arg0, Arg1, PacketSize, PacketAlign}));
4770 }
4771 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
4772 // functions
4773 case Builtin::BIcommit_read_pipe:
4774 case Builtin::BIcommit_write_pipe:
4775 case Builtin::BIwork_group_commit_read_pipe:
4776 case Builtin::BIwork_group_commit_write_pipe:
4777 case Builtin::BIsub_group_commit_read_pipe:
4778 case Builtin::BIsub_group_commit_write_pipe: {
4779 const char *Name;
4780 if (BuiltinID == Builtin::BIcommit_read_pipe)
4781 Name = "__commit_read_pipe";
4782 else if (BuiltinID == Builtin::BIcommit_write_pipe)
4783 Name = "__commit_write_pipe";
4784 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
4785 Name = "__work_group_commit_read_pipe";
4786 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
4787 Name = "__work_group_commit_write_pipe";
4788 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
4789 Name = "__sub_group_commit_read_pipe";
4790 else
4791 Name = "__sub_group_commit_write_pipe";
4792
4793 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
4794 *Arg1 = EmitScalarExpr(E->getArg(1));
4795 CGOpenCLRuntime OpenCLRT(CGM);
4796 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
4797 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
4798
4799 // Building the generic function prototype.
4800 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
4801 llvm::FunctionType *FTy =
4802 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
4803 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
4804
4806 {Arg0, Arg1, PacketSize, PacketAlign}));
4807 }
4808 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
4809 case Builtin::BIget_pipe_num_packets:
4810 case Builtin::BIget_pipe_max_packets: {
4811 const char *BaseName;
4812 const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>();
4813 if (BuiltinID == Builtin::BIget_pipe_num_packets)
4814 BaseName = "__get_pipe_num_packets";
4815 else
4816 BaseName = "__get_pipe_max_packets";
4817 std::string Name = std::string(BaseName) +
4818 std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
4819
4820 // Building the generic function prototype.
4821 Value *Arg0 = EmitScalarExpr(E->getArg(0));
4822 CGOpenCLRuntime OpenCLRT(CGM);
4823 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
4824 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
4825 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
4826 llvm::FunctionType *FTy = llvm::FunctionType::get(
4827 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
4828
4830 {Arg0, PacketSize, PacketAlign}));
4831 }
4832
4833 // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
4834 case Builtin::BIto_global:
4835 case Builtin::BIto_local:
4836 case Builtin::BIto_private: {
4837 auto Arg0 = EmitScalarExpr(E->getArg(0));
4838 auto NewArgT = llvm::PointerType::get(Int8Ty,
4840 auto NewRetT = llvm::PointerType::get(Int8Ty,
4843 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
4844 llvm::Value *NewArg;
4845 if (Arg0->getType()->getPointerAddressSpace() !=
4846 NewArgT->getPointerAddressSpace())
4847 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
4848 else
4849 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
4850 auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
4851 auto NewCall =
4852 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
4853 return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
4854 ConvertType(E->getType())));
4855 }
4856
4857 // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
4858 // It contains four different overload formats specified in Table 6.13.17.1.
4859 case Builtin::BIenqueue_kernel: {
4860 StringRef Name; // Generated function call name
4861 unsigned NumArgs = E->getNumArgs();
4862
4863 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
4864 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
4865 getContext().getTargetAddressSpace(LangAS::opencl_generic));
4866
4867 llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
4868 llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
4869 LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
4870 llvm::Value *Range = NDRangeL.getAddress(*this).getPointer();
4871 llvm::Type *RangeTy = NDRangeL.getAddress(*this).getType();
4872
4873 if (NumArgs == 4) {
4874 // The most basic form of the call with parameters:
4875 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
4876 Name = "__enqueue_kernel_basic";
4877 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
4878 GenericVoidPtrTy};
4879 llvm::FunctionType *FTy = llvm::FunctionType::get(
4880 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
4881
4882 auto Info =
4884 llvm::Value *Kernel =
4885 Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
4886 llvm::Value *Block =
4887 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
4888
4889 AttrBuilder B(Builder.getContext());
4890 B.addByValAttr(NDRangeL.getAddress(*this).getElementType());
4891 llvm::AttributeList ByValAttrSet =
4892 llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
4893
4894 auto RTCall =
4895 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
4896 {Queue, Flags, Range, Kernel, Block});
4897 RTCall->setAttributes(ByValAttrSet);
4898 return RValue::get(RTCall);
4899 }
4900 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
4901
4902 // Create a temporary array to hold the sizes of local pointer arguments
4903 // for the block. \p First is the position of the first size argument.
4904 auto CreateArrayForSizeVar = [=](unsigned First)
4905 -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
4906 llvm::APInt ArraySize(32, NumArgs - First);
4908 getContext().getSizeType(), ArraySize, nullptr, ArrayType::Normal,
4909 /*IndexTypeQuals=*/0);
4910 auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
4911 llvm::Value *TmpPtr = Tmp.getPointer();
4912 llvm::Value *TmpSize = EmitLifetimeStart(
4913 CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
4914 llvm::Value *ElemPtr;
4915 // Each of the following arguments specifies the size of the corresponding
4916 // argument passed to the enqueued block.
4917 auto *Zero = llvm::ConstantInt::get(IntTy, 0);
4918 for (unsigned I = First; I < NumArgs; ++I) {
4919 auto *Index = llvm::ConstantInt::get(IntTy, I - First);
4920 auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr,
4921 {Zero, Index});
4922 if (I == First)
4923 ElemPtr = GEP;
4924 auto *V =
4925 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
4926 Builder.CreateAlignedStore(
4927 V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
4928 }
4929 return std::tie(ElemPtr, TmpSize, TmpPtr);
4930 };
4931
4932 // Could have events and/or varargs.
4933 if (E->getArg(3)->getType()->isBlockPointerType()) {
4934 // No events passed, but has variadic arguments.
4935 Name = "__enqueue_kernel_varargs";
4936 auto Info =
4938 llvm::Value *Kernel =
4939 Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
4940 auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
4941 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
4942 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
4943
4944 // Create a vector of the arguments, as well as a constant value to
4945 // express to the runtime the number of variadic arguments.
4946 llvm::Value *const Args[] = {Queue, Flags,
4947 Range, Kernel,
4948 Block, ConstantInt::get(IntTy, NumArgs - 4),
4949 ElemPtr};
4950 llvm::Type *const ArgTys[] = {
4951 QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
4952 GenericVoidPtrTy, IntTy, ElemPtr->getType()};
4953
4954 llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
4955 auto Call = RValue::get(
4956 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
4957 if (TmpSize)
4958 EmitLifetimeEnd(TmpSize, TmpPtr);
4959 return Call;
4960 }
4961 // Any calls now have event arguments passed.
4962 if (NumArgs >= 7) {
4963 llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
4964 llvm::PointerType *EventPtrTy = EventTy->getPointerTo(
4966
4967 llvm::Value *NumEvents =
4968 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
4969
4970 // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
4971 // to be a null pointer constant (including `0` literal), we can take it
4972 // into account and emit null pointer directly.
4973 llvm::Value *EventWaitList = nullptr;
4974 if (E->getArg(4)->isNullPointerConstant(
4976 EventWaitList = llvm::ConstantPointerNull::get(EventPtrTy);
4977 } else {
4978 EventWaitList = E->getArg(4)->getType()->isArrayType()
4980 : EmitScalarExpr(E->getArg(4));
4981 // Convert to generic address space.
4982 EventWaitList = Builder.CreatePointerCast(EventWaitList, EventPtrTy);
4983 }
4984 llvm::Value *EventRet = nullptr;
4985 if (E->getArg(5)->isNullPointerConstant(
4987 EventRet = llvm::ConstantPointerNull::get(EventPtrTy);
4988 } else {
4989 EventRet =
4990 Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), EventPtrTy);
4991 }
4992
4993 auto Info =
4995 llvm::Value *Kernel =
4996 Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
4997 llvm::Value *Block =
4998 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
4999
5000 std::vector<llvm::Type *> ArgTys = {
5001 QueueTy, Int32Ty, RangeTy, Int32Ty,
5002 EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
5003
5004 std::vector<llvm::Value *> Args = {Queue, Flags, Range,
5005 NumEvents, EventWaitList, EventRet,
5006 Kernel, Block};
5007
5008 if (NumArgs == 7) {
5009 // Has events but no variadics.
5010 Name = "__enqueue_kernel_basic_events";
5011 llvm::FunctionType *FTy = llvm::FunctionType::get(
5012 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5013 return RValue::get(
5016 }
5017 // Has event info and variadics
5018 // Pass the number of variadics to the runtime function too.
5019 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
5020 ArgTys.push_back(Int32Ty);
5021 Name = "__enqueue_kernel_events_varargs";
5022
5023 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
5024 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
5025 Args.push_back(ElemPtr);
5026 ArgTys.push_back(ElemPtr->getType());
5027
5028 llvm::FunctionType *FTy = llvm::FunctionType::get(
5029 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5030 auto Call =
5033 if (TmpSize)
5034 EmitLifetimeEnd(TmpSize, TmpPtr);
5035 return Call;
5036 }
5037 LLVM_FALLTHROUGH;
5038 }
5039 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
5040 // parameter.
5041 case Builtin::BIget_kernel_work_group_size: {
5042 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
5043 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5044 auto Info =
5046 Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
5047 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5050 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
5051 false),
5052 "__get_kernel_work_group_size_impl"),
5053 {Kernel, Arg}));
5054 }
5055 case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
5056 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
5057 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5058 auto Info =
5060 Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
5061 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5064 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
5065 false),
5066 "__get_kernel_preferred_work_group_size_multiple_impl"),
5067 {Kernel, Arg}));
5068 }
5069 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
5070 case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
5071 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
5072 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5073 LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
5074 llvm::Value *NDRange = NDRangeL.getAddress(*this).getPointer();
5075 auto Info =
5077 Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
5078 Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5079 const char *Name =
5080 BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
5081 ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
5082 : "__get_kernel_sub_group_count_for_ndrange_impl";
5085 llvm::FunctionType::get(
5086 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
5087 false),
5088 Name),
5089 {NDRange, Kernel, Block}));
5090 }
5091
5092 case Builtin::BI__builtin_store_half:
5093 case Builtin::BI__builtin_store_halff: {
5094 Value *Val = EmitScalarExpr(E->getArg(0));
5096 Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
5097 return RValue::get(Builder.CreateStore(HalfVal, Address));
5098 }
5099 case Builtin::BI__builtin_load_half: {
5101 Value *HalfVal = Builder.CreateLoad(Address);
5102 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
5103 }
5104 case Builtin::BI__builtin_load_halff: {
5106 Value *HalfVal = Builder.CreateLoad(Address);
5107 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
5108 }
5109 case Builtin::BIprintf:
5110 if (getTarget().getTriple().isNVPTX() ||
5111 getTarget().getTriple().isAMDGCN()) {
5112 if (getLangOpts().OpenMPIsDevice)
5114 if (getTarget().getTriple().isNVPTX())
5116 if (getTarget().getTriple().isAMDGCN() && getLangOpts().HIP)
5118 }
5119
5120 break;
5121 case Builtin::BI__builtin_canonicalize:
5122 case Builtin::BI__builtin_canonicalizef:
5123 case Builtin::BI__builtin_canonicalizef16:
5124 case Builtin::BI__builtin_canonicalizel:
5125 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
5126
5127 case Builtin::BI__builtin_thread_pointer: {
5128 if (!getContext().getTargetInfo().isTLSSupported())
5129 CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
5130 // Fall through - it's already mapped to the intrinsic by ClangBuiltin.
5131 break;
5132 }
5133 case Builtin::BI__builtin_os_log_format:
5134 return emitBuiltinOSLogFormat(*E);
5135
5136 case Builtin::BI__xray_customevent: {
5138 return RValue::getIgnored();
5139
5142 return RValue::getIgnored();
5143
5144 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
5145 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
5146 return RValue::getIgnored();
5147
5148 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
5149 auto FTy = F->getFunctionType();
5150 auto Arg0 = E->getArg(0);
5151 auto Arg0Val = EmitScalarExpr(Arg0);
5152 auto Arg0Ty = Arg0->getType();
5153 auto PTy0 = FTy->getParamType(0);
5154 if (PTy0 != Arg0Val->getType()) {
5155 if (Arg0Ty->isArrayType())
5156 Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
5157 else
5158 Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
5159 }
5160 auto Arg1 = EmitScalarExpr(E->getArg(1));
5161 auto PTy1 = FTy->getParamType(1);
5162 if (PTy1 != Arg1->getType())
5163 Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
5164 return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
5165 }
5166
5167 case Builtin::BI__xray_typedevent: {
5168 // TODO: There should be a way to always emit events even if the current
5169 // function is not instrumented. Losing events in a stream can cripple
5170 // a trace.
5172 return RValue::getIgnored();
5173
5176 return RValue::getIgnored();
5177
5178 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
5179 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
5180 return RValue::getIgnored();
5181
5182 Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
5183 auto FTy = F->getFunctionType();
5184 auto Arg0 = EmitScalarExpr(E->getArg(0));
5185 auto PTy0 = FTy->getParamType(0);
5186 if (PTy0 != Arg0->getType())
5187 Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
5188 auto Arg1 = E->getArg(1);
5189 auto Arg1Val = EmitScalarExpr(Arg1);
5190 auto Arg1Ty = Arg1->getType();
5191 auto PTy1 = FTy->getParamType(1);
5192 if (PTy1 != Arg1Val->getType()) {
5193 if (Arg1Ty->isArrayType())
5194 Arg1Val = EmitArrayToPointerDecay(Arg1).getPointer();
5195 else
5196 Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
5197 }
5198 auto Arg2 = EmitScalarExpr(E->getArg(2));
5199 auto PTy2 = FTy->getParamType(2);
5200 if (PTy2 != Arg2->getType())
5201 Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
5202 return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
5203 }
5204
5205 case Builtin::BI__builtin_ms_va_start:
5206 case Builtin::BI__builtin_ms_va_end:
5207 return RValue::get(
5209 BuiltinID == Builtin::BI__builtin_ms_va_start));
5210
5211 case Builtin::BI__builtin_ms_va_copy: {
5212 // Lower this manually. We can't reliably determine whether or not any
5213 // given va_copy() is for a Win64 va_list from the calling convention
5214 // alone, because it's legal to do this from a System V ABI function.
5215 // With opaque pointer types, we won't have enough information in LLVM
5216 // IR to determine this from the argument types, either. Best to do it
5217 // now, while we have enough information.
5218 Address DestAddr = EmitMSVAListRef(E->getArg(0));
5219 Address SrcAddr = EmitMSVAListRef(E->getArg(1));
5220
5221 llvm::Type *BPP = Int8PtrPtrTy;
5222
5223 DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
5224 Int8PtrTy, DestAddr.getAlignment());
5225 SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
5226 Int8PtrTy, SrcAddr.getAlignment());
5227
5228 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
5229 return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
5230 }
5231
5232 case Builtin::BI__builtin_get_device_side_mangled_name: {
5233 auto Name = CGM.getCUDARuntime().getDeviceSideName(
5234 cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
5235 auto Str = CGM.GetAddrOfConstantCString(Name, "");
5236 llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0),
5237 llvm::ConstantInt::get(SizeTy, 0)};
5238 auto *Ptr = llvm::ConstantExpr::getGetElementPtr(Str.getElementType(),
5239 Str.getPointer(), Zeros);
5240 return RValue::get(Ptr);
5241 }
5242 }
5243
5244 // If this is an alias for a lib function (e.g. __builtin_sin), emit
5245 // the call using the normal call path, but using the unmangled
5246 // version of the function name.
5247 if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
5248 return emitLibraryCall(*this, FD, E,
5249 CGM.getBuiltinLibFunction(FD, BuiltinID));
5250
5251 // If this is a predefined lib function (e.g. malloc), emit the call
5252 // using exactly the normal call path.
5253 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
5254 return emitLibraryCall(*this, FD, E,
5255 cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
5256
5257 // Check that a call to a target specific builtin has the correct target
5258 // features.
5259 // This is down here to avoid non-target specific builtins, however, if
5260 // generic builtins start to require generic target features then we
5261 // can move this up to the beginning of the function.
5262 checkTargetFeatures(E, FD);
5263
5264 if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
5265 LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
5266
5267 // See if we have a target specific intrinsic.
5268 const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
5269 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
5270 StringRef Prefix =
5271 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
5272 if (!Prefix.empty()) {
5273 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name);
5274 // NOTE we don't need to perform a compatibility flag check here since the
5275 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
5276 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
5277 if (IntrinsicID == Intrinsic::not_intrinsic)
5278 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
5279 }
5280
5281 if (IntrinsicID != Intrinsic::not_intrinsic) {
5283
5284 // Find out if any arguments are required to be integer constant
5285 // expressions.
5286 unsigned ICEArguments = 0;
5288 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5289 assert(Error == ASTContext::GE_None && "Should not codegen an error");
5290
5291 Function *F = CGM.getIntrinsic(IntrinsicID);
5292 llvm::FunctionType *FTy = F->getFunctionType();
5293
5294 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
5295 Value *ArgValue;
5296 // If this is a normal argument, just emit it as a scalar.
5297 if ((ICEArguments & (1 << i)) == 0) {
5298 ArgValue = EmitScalarExpr(E->getArg(i));
5299 } else {
5300 // If this is required to be a constant, constant fold it so that we
5301 // know that the generated intrinsic gets a ConstantInt.
5302 ArgValue = llvm::ConstantInt::get(
5305 }
5306
5307 // If the intrinsic arg type is different from the builtin arg type
5308 // we need to do a bit cast.
5309 llvm::Type *PTy = FTy->getParamType(i);
5310 if (PTy != ArgValue->getType()) {
5311 // XXX - vector of pointers?
5312 if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
5313 if (PtrTy->getAddressSpace() !=
5314 ArgValue->getType()->getPointerAddressSpace()) {
5315 ArgValue = Builder.CreateAddrSpaceCast(
5316 ArgValue,
5317 ArgValue->getType()->getPointerTo(PtrTy->getAddressSpace()));
5318 }
5319 }
5320
5321 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
5322 "Must be able to losslessly bit cast to param");
5323 // Cast vector type (e.g., v256i32) to x86_amx, this only happen
5324 // in amx intrinsics.
5325 if (PTy->isX86_AMXTy())
5326 ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
5327 {ArgValue->getType()}, {ArgValue});
5328 else
5329 ArgValue = Builder.CreateBitCast(ArgValue, PTy);
5330 }
5331
5332 Args.push_back(ArgValue);
5333 }
5334
5335 Value *V = Builder.CreateCall(F, Args);
5336 QualType BuiltinRetType = E->getType();
5337
5338 llvm::Type *RetTy = VoidTy;
5339 if (!BuiltinRetType->isVoidType())
5340 RetTy = ConvertType(BuiltinRetType);
5341
5342 if (RetTy != V->getType()) {
5343 // XXX - vector of pointers?
5344 if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
5345 if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
5346 V = Builder.CreateAddrSpaceCast(
5347 V, V->getType()->getPointerTo(PtrTy->getAddressSpace()));
5348 }
5349 }
5350
5351 assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
5352 "Must be able to losslessly bit cast result type");
5353 // Cast x86_amx to vector type (e.g., v256i32), this only happen
5354 // in amx intrinsics.
5355 if (V->getType()->isX86_AMXTy())
5356 V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy},
5357 {V});
5358 else
5359 V = Builder.CreateBitCast(V, RetTy);
5360 }
5361
5362 return RValue::get(V);
5363 }
5364
5365 // Some target-specific builtins can have aggregate return values, e.g.
5366 // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
5367 // ReturnValue to be non-null, so that the target-specific emission code can
5368 // always just emit into it.
5370 if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {
5371 Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");
5372 ReturnValue = ReturnValueSlot(DestPtr, false);
5373 }
5374
5375 // Now see if we can emit a target-specific builtin.
5376 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
5377 switch (EvalKind) {
5378 case TEK_Scalar:
5379 return RValue::get(V);
5380 case TEK_Aggregate:
5381 return RValue::getAggregate(ReturnValue.getValue(),
5382 ReturnValue.isVolatile());
5383 case TEK_Complex:
5384 llvm_unreachable("No current target builtin returns complex");
5385 }
5386 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
5387 }
5388
5389 ErrorUnsupported(E, "builtin function");
5390
5391 // Unknown builtin, for now just dump it out and return undef.
5392 return GetUndefRValue(E->getType());
5393}
5394
5396 unsigned BuiltinID, const CallExpr *E,
5397 ReturnValueSlot ReturnValue,
5398 llvm::Triple::ArchType Arch) {
5399 switch (Arch) {
5400 case llvm::Triple::arm:
5401 case llvm::Triple::armeb:
5402 case llvm::Triple::thumb:
5403 case llvm::Triple::thumbeb:
5404 return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
5405 case llvm::Triple::aarch64:
5406 case llvm::Triple::aarch64_32:
5407 case llvm::Triple::aarch64_be:
5408 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
5409 case llvm::Triple::bpfeb:
5410 case llvm::Triple::bpfel:
5411 return CGF->EmitBPFBuiltinExpr(BuiltinID, E);
5412 case llvm::Triple::x86:
5413 case llvm::Triple::x86_64:
5414 return CGF->EmitX86BuiltinExpr(BuiltinID, E);
5415 case llvm::Triple::ppc:
5416 case llvm::Triple::ppcle:
5417 case llvm::Triple::ppc64:
5418 case llvm::Triple::ppc64le:
5419 return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
5420 case llvm::Triple::r600:
5421 case llvm::Triple::amdgcn:
5422 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
5423 case llvm::Triple::systemz:
5424 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
5425 case llvm::Triple::nvptx:
5426 case llvm::Triple::nvptx64:
5427 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
5428 case llvm::Triple::wasm32:
5429 case llvm::Triple::wasm64:
5430 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
5431 case llvm::Triple::hexagon:
5432 return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
5433 case llvm::Triple::riscv32:
5434 case llvm::Triple::riscv64:
5435 return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
5436 default:
5437 return nullptr;
5438 }
5439}
5440
5442 const CallExpr *E,
5443 ReturnValueSlot ReturnValue) {
5444 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
5445 assert(getContext().getAuxTargetInfo() && "Missing aux target info");
5447 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
5448 ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
5449 }
5450
5451 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
5452 getTarget().getTriple().getArch());
5453}
5454
5455static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
5456 NeonTypeFlags TypeFlags,
5457 bool HasLegalHalfType = true,
5458 bool V1Ty = false,
5459 bool AllowBFloatArgsAndRet = true) {
5460 int IsQuad = TypeFlags.isQuad();
5461 switch (TypeFlags.getEltType()) {
5464 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
5467 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
5469 if (AllowBFloatArgsAndRet)
5470 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
5471 else
5472 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
5474 if (HasLegalHalfType)
5475 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
5476 else
5477 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
5479 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
5482 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
5484 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
5485 // There is a lot of i128 and f128 API missing.
5486 // so we use v16i8 to represent poly128 and get pattern matched.
5487 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
5489 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
5491 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
5492 }
5493 llvm_unreachable("Unknown vector element type!");
5494}
5495
5496static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
5497 NeonTypeFlags IntTypeFlags) {
5498 int IsQuad = IntTypeFlags.isQuad();
5499 switch (IntTypeFlags.getEltType()) {
5501 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
5503 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
5505 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
5506 default:
5507 llvm_unreachable("Type can't be converted to floating-point!");
5508 }
5509}
5510
5512 const ElementCount &Count) {
5513 Value *SV = llvm::ConstantVector::getSplat(Count, C);
5514 return Builder.CreateShuffleVector(V, V, SV, "lane");
5515}
5516
5518 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
5519 return EmitNeonSplat(V, C, EC);
5520}
5521
5523 const char *name,
5524 unsigned shift, bool rightshift) {
5525 unsigned j = 0;
5526 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
5527 ai != ae; ++ai, ++j) {
5528 if (F->isConstrainedFPIntrinsic())
5529 if (ai->getType()->isMetadataTy())
5530 continue;
5531 if (shift > 0 && shift == j)
5532 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
5533 else
5534 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
5535 }
5536
5537 if (F->isConstrainedFPIntrinsic())
5538 return Builder.CreateConstrainedFPCall(F, Ops, name);
5539 else
5540 return Builder.CreateCall(F, Ops, name);
5541}
5542
5544 bool neg) {
5545 int SV = cast<ConstantInt>(V)->getSExtValue();
5546 return ConstantInt::get(Ty, neg ? -SV : SV);
5547}
5548
5549// Right-shift a vector by a constant.
5551 llvm::Type *Ty, bool usgn,
5552 const char *name) {
5553 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
5554
5555 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
5556 int EltSize = VTy->getScalarSizeInBits();
5557
5558 Vec = Builder.CreateBitCast(Vec, Ty);
5559
5560 // lshr/ashr are undefined when the shift amount is equal to the vector
5561 // element size.
5562 if (ShiftAmt == EltSize) {
5563 if (usgn) {
5564 // Right-shifting an unsigned value by its size yields 0.
5565 return llvm::ConstantAggregateZero::get(VTy);
5566 } else {
5567 // Right-shifting a signed value by its size is equivalent
5568 // to a shift of size-1.
5569 --ShiftAmt;
5570 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
5571 }
5572 }
5573
5574 Shift = EmitNeonShiftVector(Shift, Ty, false);
5575 if (usgn)
5576 return Builder.CreateLShr(Vec, Shift, name);
5577 else
5578 return Builder.CreateAShr(Vec, Shift, name);
5579}
5580
5581enum {
5582 AddRetType = (1 << 0),
5583 Add1ArgType = (1 << 1),
5584 Add2ArgTypes = (1 << 2),
5585
5588
5590 UnsignedAlts = (1 << 6),
5591
5594
5602
5603namespace {
5604struct ARMVectorIntrinsicInfo {
5605 const char *NameHint;
5606 unsigned BuiltinID;
5607 unsigned LLVMIntrinsic;
5608 unsigned AltLLVMIntrinsic;
5610
5611 bool operator<(unsigned RHSBuiltinID) const {
5612 return BuiltinID < RHSBuiltinID;
5613 }
5614 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
5615 return BuiltinID < TE.BuiltinID;
5616 }
5617};
5618} // end anonymous namespace
5619
5620#define NEONMAP0(NameBase) \
5621 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
5622
5623#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
5624 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
5625 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
5626
5627#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
5628 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
5629 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
5630 TypeModifier }
5631
5632static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
5633 NEONMAP1(__a32_vcvt_bf16_v, arm_neon_vcvtfp2bf, 0),
5634 NEONMAP0(splat_lane_v),
5635 NEONMAP0(splat_laneq_v),
5636 NEONMAP0(splatq_lane_v),
5637 NEONMAP0(splatq_laneq_v),
5638 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
5639 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
5640 NEONMAP1(vabs_v, arm_neon_vabs, 0),
5641 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
5642 NEONMAP0(vadd_v),
5643 NEONMAP0(vaddhn_v),
5644 NEONMAP0(vaddq_v),
5645 NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
5646 NEONMAP1(vaeseq_v, arm_neon_aese, 0),
5647 NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
5648 NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
5649 NEONMAP1(vbfdot_v, arm_neon_bfdot, 0),
5650 NEONMAP1(vbfdotq_v, arm_neon_bfdot, 0),
5651 NEONMAP1(vbfmlalbq_v, arm_neon_bfmlalb, 0),
5652 NEONMAP1(vbfmlaltq_v, arm_neon_bfmlalt, 0),
5653 NEONMAP1(vbfmmlaq_v, arm_neon_bfmmla, 0),
5654 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
5655 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
5656 NEONMAP1(vcadd_rot270_v, arm_neon_vcadd_rot270, Add1ArgType),
5657 NEONMAP1(vcadd_rot90_v, arm_neon_vcadd_rot90, Add1ArgType),
5658 NEONMAP1(vcaddq_rot270_v, arm_neon_vcadd_rot270, Add1ArgType),
5659 NEONMAP1(vcaddq_rot90_v, arm_neon_vcadd_rot90, Add1ArgType),
5660 NEONMAP1(vcage_v, arm_neon_vacge, 0),
5661 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
5662 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
5663 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
5664 NEONMAP1(vcale_v, arm_neon_vacge, 0),
5665 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
5666 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
5667 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
5668 NEONMAP0(vceqz_v),
5669 NEONMAP0(vceqzq_v),
5670 NEONMAP0(vcgez_v),
5671 NEONMAP0(vcgezq_v),
5672 NEONMAP0(vcgtz_v),
5673 NEONMAP0(vcgtzq_v),
5674 NEONMAP0(vclez_v),
5675 NEONMAP0(vclezq_v),
5676 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
5677 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
5678 NEONMAP0(vcltz_v),
5679 NEONMAP0(vcltzq_v),
5680 NEONMAP1(vclz_v, ctlz, Add1ArgType),
5681 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
5682 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
5683 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
5684 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
5685 NEONMAP0(vcvt_f16_v),
5686 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
5687 NEONMAP0(vcvt_f32_v),
5688 NEONMAP2(vcvt_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
5689 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
5690 NEONMAP1(vcvt_n_s16_v, arm_neon_vcvtfp2fxs, 0),
5691 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
5692 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
5693 NEONMAP1(vcvt_n_u16_v, arm_neon_vcvtfp2fxu, 0),
5694 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
5695 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
5696 NEONMAP0(vcvt_s16_v),
5697 NEONMAP0(vcvt_s32_v),
5698 NEONMAP0(vcvt_s64_v),
5699 NEONMAP0(vcvt_u16_v),
5700 NEONMAP0(vcvt_u32_v),
5701 NEONMAP0(vcvt_u64_v),
5702 NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0),
5703 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
5704 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
5705 NEONMAP1(vcvta_u16_v, arm_neon_vcvtau, 0),
5706 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
5707 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
5708 NEONMAP1(vcvtaq_s16_v, arm_neon_vcvtas, 0),
5709 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
5710 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
5711 NEONMAP1(vcvtaq_u16_v, arm_neon_vcvtau, 0),
5712 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
5713 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
5714 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
5715 NEONMAP1(vcvtm_s16_v, arm_neon_vcvtms, 0),
5716 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
5717 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
5718 NEONMAP1(vcvtm_u16_v, arm_neon_vcvtmu, 0),
5719 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
5720 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
5721 NEONMAP1(vcvtmq_s16_v, arm_neon_vcvtms, 0),
5722 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
5723 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
5724 NEONMAP1(vcvtmq_u16_v, arm_neon_vcvtmu, 0),
5725 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
5726 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
5727 NEONMAP1(vcvtn_s16_v, arm_neon_vcvtns, 0),
5728 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
5729 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
5730 NEONMAP1(vcvtn_u16_v, arm_neon_vcvtnu, 0),
5731 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
5732 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
5733 NEONMAP1(vcvtnq_s16_v, arm_neon_vcvtns, 0),
5734 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
5735 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
5736 NEONMAP1(vcvtnq_u16_v, arm_neon_vcvtnu, 0),
5737 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
5738 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
5739 NEONMAP1(vcvtp_s16_v, arm_neon_vcvtps, 0),
5740 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
5741 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
5742 NEONMAP1(vcvtp_u16_v, arm_neon_vcvtpu, 0),
5743 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
5744 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
5745 NEONMAP1(vcvtpq_s16_v, arm_neon_vcvtps, 0),
5746 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
5747 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
5748 NEONMAP1(vcvtpq_u16_v, arm_neon_vcvtpu, 0),
5749 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
5750 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
5751 NEONMAP0(vcvtq_f16_v),
5752 NEONMAP0(vcvtq_f32_v),
5753 NEONMAP2(vcvtq_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
5754 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
5755 NEONMAP1(vcvtq_n_s16_v, arm_neon_vcvtfp2fxs, 0),
5756 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
5757 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
5758 NEONMAP1(vcvtq_n_u16_v, arm_neon_vcvtfp2fxu, 0),
5759 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
5760 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
5761 NEONMAP0(vcvtq_s16_v),
5762 NEONMAP0(vcvtq_s32_v),
5763 NEONMAP0(vcvtq_s64_v),
5764 NEONMAP0(vcvtq_u16_v),
5765 NEONMAP0(vcvtq_u32_v),
5766 NEONMAP0(vcvtq_u64_v),
5767 NEONMAP2(vdot_v, arm_neon_udot, arm_neon_sdot, 0),
5768 NEONMAP2(vdotq_v, arm_neon_udot, arm_neon_sdot, 0),
5769 NEONMAP0(vext_v),
5770 NEONMAP0(vextq_v),
5771 NEONMAP0(vfma_v),
5772 NEONMAP0(vfmaq_v),
5773 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
5774 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
5775 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
5776 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
5777 NEONMAP0(vld1_dup_v),
5778 NEONMAP1(vld1_v, arm_neon_vld1, 0),
5779 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
5780 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
5781 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
5782 NEONMAP0(vld1q_dup_v),
5783 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
5784 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
5785 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
5786 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
5787 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
5788 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
5789 NEONMAP1(vld2_v, arm_neon_vld2, 0),
5790 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
5791 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
5792 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
5793 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
5794 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
5795 NEONMAP1(vld3_v, arm_neon_vld3, 0),
5796 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
5797 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
5798 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
5799 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
5800 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
5801 NEONMAP1(vld4_v, arm_neon_vld4, 0),
5802 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
5803 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
5804 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
5805 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
5806 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
5807 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
5808 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
5809 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
5810 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
5811 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
5812 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
5813 NEONMAP2(vmmlaq_v, arm_neon_ummla, arm_neon_smmla, 0),
5814 NEONMAP0(vmovl_v),
5815 NEONMAP0(vmovn_v),
5816 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
5817 NEONMAP0(vmull_v),
5818 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
5819 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
5820 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
5821 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
5822 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
5823 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
5824 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
5825 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
5826 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
5827 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
5828 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
5829 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
5830 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
5831 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
5832 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
5833 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
5834 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
5835 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
5836 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
5837 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
5838 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
5839 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
5840 NEONMAP1(vqrdmlah_v, arm_neon_vqrdmlah, Add1ArgType),
5841 NEONMAP1(vqrdmlahq_v, arm_neon_vqrdmlah, Add1ArgType),
5842 NEONMAP1(vqrdmlsh_v, arm_neon_vqrdmlsh, Add1ArgType),
5843 NEONMAP1(vqrdmlshq_v, arm_neon_vqrdmlsh, Add1ArgType),
5844 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
5845 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
5846 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
5847 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
5848 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
5849 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
5850 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
5851 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
5852 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
5853 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
5854 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
5855 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
5856 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
5857 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
5858 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
5859 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
5860 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
5861 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
5862 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
5863 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
5864 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
5865 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
5866 NEONMAP0(vrndi_v),
5867 NEONMAP0(vrndiq_v),
5868 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
5869 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
5870 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
5871 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
5872 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
5873 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
5874 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
5875 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
5876 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
5877 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
5878 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
5879 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
5880 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
5881 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
5882 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
5883 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
5884 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
5885 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
5886 NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
5887 NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
5888 NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
5889 NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
5890 NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
5891 NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
5892 NEONMAP0(vshl_n_v),
5893 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
5894 NEONMAP0(vshll_n_v),
5895 NEONMAP0(vshlq_n_v),
5896 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
5897 NEONMAP0(vshr_n_v),
5898 NEONMAP0(vshrn_n_v),
5899 NEONMAP0(vshrq_n_v),
5900 NEONMAP1(vst1_v, arm_neon_vst1, 0),
5901 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
5902 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
5903 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
5904 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
5905 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
5906 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
5907 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
5908 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
5909 NEONMAP1(vst2_v, arm_neon_vst2, 0),
5910 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
5911 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
5912 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
5913 NEONMAP1(vst3_v, arm_neon_vst3, 0),
5914 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
5915 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
5916 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
5917 NEONMAP1(vst4_v, arm_neon_vst4, 0),
5918 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
5919 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
5920 NEONMAP0(vsubhn_v),
5921 NEONMAP0(vtrn_v),
5922 NEONMAP0(vtrnq_v),
5923 NEONMAP0(vtst_v),
5924 NEONMAP0(vtstq_v),
5925 NEONMAP1(vusdot_v, arm_neon_usdot, 0),
5926 NEONMAP1(vusdotq_v, arm_neon_usdot, 0),
5927 NEONMAP1(vusmmlaq_v, arm_neon_usmmla, 0),
5928 NEONMAP0(vuzp_v),
5929 NEONMAP0(vuzpq_v),
5930 NEONMAP0(vzip_v),
5931 NEONMAP0(vzipq_v)
5932};
5933
5934static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
5935 NEONMAP1(__a64_vcvtq_low_bf16_v, aarch64_neon_bfcvtn, 0),
5936 NEONMAP0(splat_lane_v),
5937 NEONMAP0(splat_laneq_v),
5938 NEONMAP0(splatq_lane_v),
5939 NEONMAP0(splatq_laneq_v),
5940 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
5941 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
5942 NEONMAP0(vadd_v),
5943 NEONMAP0(vaddhn_v),
5944 NEONMAP0(vaddq_p128),
5945 NEONMAP0(vaddq_v),
5946 NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
5947 NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
5948 NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
5949 NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
5950 NEONMAP2(vbcaxq_v, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
5951 NEONMAP1(vbfdot_v, aarch64_neon_bfdot, 0),
5952 NEONMAP1(vbfdotq_v, aarch64_neon_bfdot, 0),
5953 NEONMAP1(vbfmlalbq_v, aarch64_neon_bfmlalb, 0),
5954 NEONMAP1(vbfmlaltq_v, aarch64_neon_bfmlalt, 0),
5955 NEONMAP1(vbfmmlaq_v, aarch64_neon_bfmmla, 0),
5956 NEONMAP1(vcadd_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType),
5957 NEONMAP1(vcadd_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType),
5958 NEONMAP1(vcaddq_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType),
5959 NEONMAP1(vcaddq_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType),
5960 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
5961 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
5962 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
5963 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
5964 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
5965 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
5966 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
5967 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
5968 NEONMAP0(vceqz_v),
5969 NEONMAP0(vceqzq_v),
5970 NEONMAP0(vcgez_v),
5971 NEONMAP0(vcgezq_v),
5972 NEONMAP0(vcgtz_v),
5973 NEONMAP0(vcgtzq_v),
5974 NEONMAP0(vclez_v),
5975 NEONMAP0(vclezq_v),
5976 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
5977 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
5978 NEONMAP0(vcltz_v),
5979 NEONMAP0(vcltzq_v),
5980 NEONMAP1(vclz_v, ctlz, Add1ArgType),
5981 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
5982 NEONMAP1(vcmla_rot180_v, aarch64_neon_vcmla_rot180, Add1ArgType),
5983 NEONMAP1(vcmla_rot270_v, aarch64_neon_vcmla_rot270, Add1ArgType),
5984 NEONMAP1(vcmla_rot90_v, aarch64_neon_vcmla_rot90, Add1ArgType),
5985 NEONMAP1(vcmla_v, aarch64_neon_vcmla_rot0, Add1ArgType),
5986 NEONMAP1(vcmlaq_rot180_v, aarch64_neon_vcmla_rot180, Add1ArgType),
5987 NEONMAP1(vcmlaq_rot270_v, aarch64_neon_vcmla_rot270, Add1ArgType),
5988 NEONMAP1(vcmlaq_rot90_v, aarch64_neon_vcmla_rot90, Add1ArgType),
5989 NEONMAP1(vcmlaq_v, aarch64_neon_vcmla_rot0, Add1ArgType),
5990 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
5991 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
5992 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
5993 NEONMAP0(vcvt_f16_v),
5994 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
5995 NEONMAP0(vcvt_f32_v),
5996 NEONMAP2(vcvt_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
5997 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
5998 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
5999 NEONMAP1(vcvt_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
6000 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
6001 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
6002 NEONMAP1(vcvt_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
6003 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
6004 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
6005 NEONMAP0(vcvtq_f16_v),
6006 NEONMAP0(vcvtq_f32_v),
6007 NEONMAP1(vcvtq_high_bf16_v, aarch64_neon_bfcvtn2, 0),
6008 NEONMAP2(vcvtq_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6009 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6010 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6011 NEONMAP1(vcvtq_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
6012 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
6013 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
6014 NEONMAP1(vcvtq_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
6015 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
6016 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
6017 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
6018 NEONMAP2(vdot_v, aarch64_neon_udot, aarch64_neon_sdot, 0),
6019 NEONMAP2(vdotq_v, aarch64_neon_udot, aarch64_neon_sdot, 0),
6020 NEONMAP2(veor3q_v, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6021 NEONMAP0(vext_v),
6022 NEONMAP0(vextq_v),
6023 NEONMAP0(vfma_v),
6024 NEONMAP0(vfmaq_v),
6025 NEONMAP1(vfmlal_high_v, aarch64_neon_fmlal2, 0),
6026 NEONMAP1(vfmlal_low_v, aarch64_neon_fmlal, 0),
6027 NEONMAP1(vfmlalq_high_v, aarch64_neon_fmlal2, 0),
6028 NEONMAP1(vfmlalq_low_v, aarch64_neon_fmlal, 0),
6029 NEONMAP1(vfmlsl_high_v, aarch64_neon_fmlsl2, 0),
6030 NEONMAP1(vfmlsl_low_v, aarch64_neon_fmlsl, 0),
6031 NEONMAP1(vfmlslq_high_v, aarch64_neon_fmlsl2, 0),
6032 NEONMAP1(vfmlslq_low_v, aarch64_neon_fmlsl, 0),
6033 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
6034 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
6035 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
6036 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
6037 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
6038 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
6039 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
6040 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
6041 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
6042 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
6043 NEONMAP2(vmmlaq_v, aarch64_neon_ummla, aarch64_neon_smmla, 0),
6044 NEONMAP0(vmovl_v),
6045 NEONMAP0(vmovn_v),
6046 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
6047 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
6048 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
6049 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
6050 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
6051 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
6052 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
6053 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
6054 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
6055 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
6056 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
6057 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
6058 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
6059 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
6060 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
6061 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
6062 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
6063 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
6064 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
6065 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
6066 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
6067 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
6068 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
6069 NEONMAP1(vqrdmlah_v, aarch64_neon_sqrdmlah, Add1ArgType),
6070 NEONMAP1(vqrdmlahq_v, aarch64_neon_sqrdmlah, Add1ArgType),
6071 NEONMAP1(vqrdmlsh_v, aarch64_neon_sqrdmlsh, Add1ArgType),
6072 NEONMAP1(vqrdmlshq_v, aarch64_neon_sqrdmlsh, Add1ArgType),
6073 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
6074 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
6075 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
6076 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
6077 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
6078 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
6079 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
6080 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
6081 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
6082 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
6083 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
6084 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
6085 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
6086 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
6087 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
6088 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
6089 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
6090 NEONMAP1(vrax1q_v, aarch64_crypto_rax1, 0),
6091 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
6092 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
6093 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
6094 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
6095 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
6096 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
6097 NEONMAP1(vrnd32x_v, aarch64_neon_frint32x, Add1ArgType),
6098 NEONMAP1(vrnd32xq_v, aarch64_neon_frint32x, Add1ArgType),
6099 NEONMAP1(vrnd32z_v, aarch64_neon_frint32z, Add1ArgType),
6100 NEONMAP1(vrnd32zq_v, aarch64_neon_frint32z, Add1ArgType),
6101 NEONMAP1(vrnd64x_v, aarch64_neon_frint64x, Add1ArgType),
6102 NEONMAP1(vrnd64xq_v, aarch64_neon_frint64x, Add1ArgType),
6103 NEONMAP1(vrnd64z_v, aarch64_neon_frint64z, Add1ArgType),
6104 NEONMAP1(vrnd64zq_v, aarch64_neon_frint64z, Add1ArgType),
6105 NEONMAP0(vrndi_v),
6106 NEONMAP0(vrndiq_v),
6107 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
6108 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
6109 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
6110 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
6111 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
6112 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
6113 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
6114 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
6115 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
6116 NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
6117 NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
6118 NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
6119 NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
6120 NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
6121 NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
6122 NEONMAP1(vsha512h2q_v, aarch64_crypto_sha512h2, 0),
6123 NEONMAP1(vsha512hq_v, aarch64_crypto_sha512h, 0),
6124 NEONMAP1(vsha512su0q_v, aarch64_crypto_sha512su0, 0),
6125 NEONMAP1(vsha512su1q_v, aarch64_crypto_sha512su1, 0),
6126 NEONMAP0(vshl_n_v),
6127 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
6128 NEONMAP0(vshll_n_v),
6129 NEONMAP0(vshlq_n_v),
6130 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
6131 NEONMAP0(vshr_n_v),
6132 NEONMAP0(vshrn_n_v),
6133 NEONMAP0(vshrq_n_v),
6134 NEONMAP1(vsm3partw1q_v, aarch64_crypto_sm3partw1, 0),
6135 NEONMAP1(vsm3partw2q_v, aarch64_crypto_sm3partw2, 0),
6136 NEONMAP1(vsm3ss1q_v, aarch64_crypto_sm3ss1, 0),
6137 NEONMAP1(vsm3tt1aq_v, aarch64_crypto_sm3tt1a, 0),
6138 NEONMAP1(vsm3tt1bq_v, aarch64_crypto_sm3tt1b, 0),
6139 NEONMAP1(vsm3tt2aq_v, aarch64_crypto_sm3tt2a, 0),
6140 NEONMAP1(vsm3tt2bq_v, aarch64_crypto_sm3tt2b, 0),
6141 NEONMAP1(vsm4ekeyq_v, aarch64_crypto_sm4ekey, 0),
6142 NEONMAP1(vsm4eq_v, aarch64_crypto_sm4e, 0),
6143 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
6144 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
6145 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
6146 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
6147 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
6148 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
6149 NEONMAP0(vsubhn_v),
6150 NEONMAP0(vtst_v),
6151 NEONMAP0(vtstq_v),
6152 NEONMAP1(vusdot_v, aarch64_neon_usdot, 0),
6153 NEONMAP1(vusdotq_v, aarch64_neon_usdot, 0),
6154 NEONMAP1(vusmmlaq_v, aarch64_neon_usmmla, 0),
6155 NEONMAP1(vxarq_v, aarch64_crypto_xar, 0),
6156};
6157
6158static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
6159 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
6160 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
6161 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
6162 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
6163 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
6164 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
6165 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
6166 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
6167 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
6168 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6169 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
6170 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
6171 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
6172 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
6173 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6174 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6175 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
6176 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
6177 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
6178 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
6179 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
6180 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
6181 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
6182 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
6183 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
6184 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
6185 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
6186 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
6187 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
6188 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
6189 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
6190 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
6191 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
6192 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
6193 NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0),
6194 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
6195 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
6196 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
6197 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
6198 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
6199 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
6200 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
6201 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
6202 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
6203 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
6204 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
6205 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
6206 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
6207 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
6208 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
6209 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
6210 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
6211 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
6212 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
6213 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
6214 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
6215 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
6216 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
6217 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
6218 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
6219 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
6220 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
6221 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
6222 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
6223 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
6224 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
6225 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
6226 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
6227 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
6228 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
6229 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
6230 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
6231 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
6232 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
6233 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
6234 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
6235 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
6236 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6237 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6238 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
6239 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
6240 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
6241 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
6242 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
6243 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
6244 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
6245 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
6246 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
6247 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
6248 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
6249 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
6250 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
6251 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
6252 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
6253 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
6254 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
6255 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
6256 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
6257 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
6258 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
6259 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
6260 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
6261 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
6262 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
6263 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
6264 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
6265 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
6266 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
6267 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
6268 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
6269 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
6270 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
6271 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
6272 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
6273 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
6274 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
6275 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
6276 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
6277 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
6278 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
6279 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
6280 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
6281 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
6282 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
6283 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
6284 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
6285 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
6286 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
6287 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
6288 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
6289 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
6290 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
6291 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
6292 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
6293 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
6294 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
6295 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
6296 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
6297 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
6298 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
6299 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
6300 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
6301 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
6302 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
6303 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
6304 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
6305 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
6306 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
6307 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
6308 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
6309 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
6310 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
6311 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
6312 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
6313 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
6314 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
6315 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
6316 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
6317 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
6318 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
6319 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
6320 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
6321 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
6322 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
6323 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
6324 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
6325 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
6326 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
6327 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
6328 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
6329 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
6330 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
6331 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
6332 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
6333 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
6334 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
6335 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
6336 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
6337 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
6338 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
6339 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
6340 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
6341 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
6342 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
6343 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
6344 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
6345 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
6346 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
6347 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
6348 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
6349 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
6350 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
6351 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
6352 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
6353 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
6354 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
6355 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
6356 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
6357 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
6358 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
6359 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
6360 // FP16 scalar intrinisics go here.
6361 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
6362 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
6363 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
6364 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
6365 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
6366 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
6367 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
6368 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
6369 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
6370 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
6371 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
6372 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
6373 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
6374 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
6375 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
6376 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
6377 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
6378 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
6379 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
6380 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
6381 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
6382 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
6383 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
6384 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
6385 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
6386 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
6387 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
6388 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
6389 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
6390 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
6391 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
6392 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
6393 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
6394 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
6395};
6396
6397#undef NEONMAP0
6398#undef NEONMAP1
6399#undef NEONMAP2
6400
6401#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
6402 { \
6403 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
6404 TypeModifier \
6405 }
6406
6407#define SVEMAP2(NameBase, TypeModifier) \
6408 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
6409static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
6410#define GET_SVE_LLVM_INTRINSIC_MAP
6411#include "clang/Basic/arm_sve_builtin_cg.inc"
6412#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
6413#undef GET_SVE_LLVM_INTRINSIC_MAP
6414};
6415
6416#undef SVEMAP1
6417#undef SVEMAP2
6418
6420
6424
6425static const ARMVectorIntrinsicInfo *
6427 unsigned BuiltinID, bool &MapProvenSorted) {
6428
6429#ifndef NDEBUG
6430 if (!MapProvenSorted) {
6431 assert(llvm::is_sorted(IntrinsicMap));
6432 MapProvenSorted = true;
6433 }
6434#endif
6435
6436 const ARMVectorIntrinsicInfo *Builtin =
6437 llvm::lower_bound(IntrinsicMap, BuiltinID);
6438
6439 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
6440 return Builtin;
6441
6442 return nullptr;
6443}
6444
6445Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
6446 unsigned Modifier,
6447 llvm::Type *ArgType,
6448 const CallExpr *E) {
6449 int VectorSize = 0;
6450 if (Modifier & Use64BitVectors)
6451 VectorSize = 64;
6452 else if (Modifier & Use128BitVectors)
6453 VectorSize = 128;
6454
6455 // Return type.
6457 if (Modifier & AddRetType) {
6458 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6459 if (Modifier & VectorizeRetType)
6460 Ty = llvm::FixedVectorType::get(
6461 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
6462
6463 Tys.push_back(Ty);
6464 }
6465
6466 // Arguments.
6467 if (Modifier & VectorizeArgTypes) {
6468 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
6469 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
6470 }
6471
6472 if (Modifier & (Add1ArgType | Add2ArgTypes))
6473 Tys.push_back(ArgType);
6474
6475 if (Modifier & Add2ArgTypes)
6476 Tys.push_back(ArgType);
6477
6478 if (Modifier & InventFloatType)
6479 Tys.push_back(FloatTy);
6480
6481 return CGM.getIntrinsic(IntrinsicID, Tys);
6482}
6483
6485 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
6486 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
6487 unsigned BuiltinID = SISDInfo.BuiltinID;
6488 unsigned int Int = SISDInfo.LLVMIntrinsic;
6489 unsigned Modifier = SISDInfo.TypeModifier;
6490 const char *s = SISDInfo.NameHint;
6491
6492 switch (BuiltinID) {
6493 case NEON::BI__builtin_neon_vcled_s64:
6494 case NEON::BI__builtin_neon_vcled_u64:
6495 case NEON::BI__builtin_neon_vcles_f32:
6496 case NEON::BI__builtin_neon_vcled_f64:
6497 case NEON::BI__builtin_neon_vcltd_s64:
6498 case NEON::BI__builtin_neon_vcltd_u64:
6499 case NEON::BI__builtin_neon_vclts_f32:
6500 case NEON::BI__builtin_neon_vcltd_f64:
6501 case NEON::BI__builtin_neon_vcales_f32:
6502 case NEON::BI__builtin_neon_vcaled_f64:
6503 case NEON::BI__builtin_neon_vcalts_f32:
6504 case NEON::BI__builtin_neon_vcaltd_f64:
6505 // Only one direction of comparisons actually exist, cmle is actually a cmge
6506 // with swapped operands. The table gives us the right intrinsic but we
6507 // still need to do the swap.
6508 std::swap(Ops[0], Ops[1]);
6509 break;
6510 }
6511
6512 assert(Int && "Generic code assumes a valid intrinsic");
6513
6514 // Determine the type(s) of this overloaded AArch64 intrinsic.
6515 const Expr *Arg = E->getArg(0);
6516 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
6517 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
6518
6519 int j = 0;
6520 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
6521 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
6522 ai != ae; ++ai, ++j) {
6523 llvm::Type *ArgTy = ai->getType();
6524 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
6525 ArgTy->getPrimitiveSizeInBits())
6526 continue;
6527
6528 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
6529 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
6530 // it before inserting.
6531 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
6532 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
6533 Ops[j] =
6534 CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
6535 }
6536
6537 Value *Result = CGF.EmitNeonCall(F, Ops, s);
6538 llvm::Type *ResultType = CGF.ConvertType(E->getType());
6539 if (ResultType->getPrimitiveSizeInBits().getFixedSize() <
6540 Result->getType()->getPrimitiveSizeInBits().getFixedSize())
6541 return CGF.Builder.CreateExtractElement(Result, C0);
6542
6543 return CGF.Builder.CreateBitCast(Result, ResultType, s);
6544}
6545
6547 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
6548 const char *NameHint, unsigned Modifier, const CallExpr *E,
6549 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
6550 llvm::Triple::ArchType Arch) {
6551 // Get the last argument, which specifies the vector type.
6552 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
6553 Optional<llvm::APSInt> NeonTypeConst =
6555 if (!NeonTypeConst)
6556 return nullptr;
6557
6558 // Determine the type of this overloaded NEON intrinsic.
6559 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
6560 bool Usgn = Type.isUnsigned();
6561 bool Quad = Type.isQuad();
6562 const bool HasLegalHalfType = getTarget().hasLegalHalfType();
6563 const bool AllowBFloatArgsAndRet =
6565
6566 llvm::FixedVectorType *VTy =
6567 GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet);
6568 llvm::Type *Ty = VTy;
6569 if (!Ty)
6570 return nullptr;
6571
6572 auto getAlignmentValue32 = [&](Address addr) -> Value* {
6573 return Builder.getInt32(addr.getAlignment().getQuantity());
6574 };
6575
6576 unsigned Int = LLVMIntrinsic;
6577 if ((Modifier & UnsignedAlts) && !Usgn)
6578 Int = AltLLVMIntrinsic;
6579
6580 switch (BuiltinID) {
6581 default: break;
6582 case NEON::BI__builtin_neon_splat_lane_v:
6583 case NEON::BI__builtin_neon_splat_laneq_v:
6584 case NEON::BI__builtin_neon_splatq_lane_v:
6585 case NEON::BI__builtin_neon_splatq_laneq_v: {
6586 auto NumElements = VTy->getElementCount();
6587 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
6588 NumElements = NumElements * 2;
6589 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
6590 NumElements = NumElements.divideCoefficientBy(2);
6591
6592 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6593 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
6594 }
6595 case NEON::BI__builtin_neon_vpadd_v:
6596 case NEON::BI__builtin_neon_vpaddq_v:
6597 // We don't allow fp/int overloading of intrinsics.
6598 if (VTy->getElementType()->isFloatingPointTy() &&
6599 Int == Intrinsic::aarch64_neon_addp)
6600 Int = Intrinsic::aarch64_neon_faddp;
6601 break;
6602 case NEON::BI__builtin_neon_vabs_v:
6603 case NEON::BI__builtin_neon_vabsq_v:
6604 if (VTy->getElementType()->isFloatingPointTy())
6605 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
6606 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
6607 case NEON::BI__builtin_neon_vadd_v:
6608 case NEON::BI__builtin_neon_vaddq_v: {
6609 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
6610 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6611 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6612 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
6613 return Builder.CreateBitCast(Ops[0], Ty);
6614 }
6615 case NEON::BI__builtin_neon_vaddhn_v: {
6616 llvm::FixedVectorType *SrcTy =
6617 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
6618
6619 // %sum = add <4 x i32> %lhs, %rhs
6620 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
6621 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
6622 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
6623
6624 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
6625 Constant *ShiftAmt =
6626 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
6627 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
6628
6629 // %res = trunc <4 x i32> %high to <4 x i16>
6630 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
6631 }
6632 case NEON::BI__builtin_neon_vcale_v:
6633 case NEON::BI__builtin_neon_vcaleq_v:
6634 case NEON::BI__builtin_neon_vcalt_v:
6635 case NEON::BI__builtin_neon_vcaltq_v:
6636 std::swap(Ops[0], Ops[1]);
6637 LLVM_FALLTHROUGH;
6638 case NEON::BI__builtin_neon_vcage_v:
6639 case NEON::BI__builtin_neon_vcageq_v:
6640 case NEON::BI__builtin_neon_vcagt_v:
6641 case NEON::BI__builtin_neon_vcagtq_v: {
6642 llvm::Type *Ty;
6643 switch (VTy->getScalarSizeInBits()) {
6644 default: llvm_unreachable("unexpected type");
6645 case 32:
6646 Ty = FloatTy;
6647 break;
6648 case 64:
6649 Ty = DoubleTy;
6650 break;
6651 case 16:
6652 Ty = HalfTy;
6653 break;
6654 }
6655 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
6656 llvm::Type *Tys[] = { VTy, VecFlt };
6657 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
6658 return EmitNeonCall(F, Ops, NameHint);
6659 }
6660 case NEON::BI__builtin_neon_vceqz_v:
6661 case NEON::BI__builtin_neon_vceqzq_v:
6662 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
6663 ICmpInst::ICMP_EQ, "vceqz");
6664 case NEON::BI__builtin_neon_vcgez_v:
6665 case NEON::BI__builtin_neon_vcgezq_v:
6666 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
6667 ICmpInst::ICMP_SGE, "vcgez");
6668 case NEON::BI__builtin_neon_vclez_v:
6669 case NEON::BI__builtin_neon_vclezq_v:
6670 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
6671 ICmpInst::ICMP_SLE, "vclez");
6672 case NEON::BI__builtin_neon_vcgtz_v:
6673 case NEON::BI__builtin_neon_vcgtzq_v:
6674 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
6675 ICmpInst::ICMP_SGT, "vcgtz");
6676 case NEON::BI__builtin_neon_vcltz_v:
6677 case NEON::BI__builtin_neon_vcltzq_v:
6678 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
6679 ICmpInst::ICMP_SLT, "vcltz");
6680 case NEON::BI__builtin_neon_vclz_v:
6681 case NEON::BI__builtin_neon_vclzq_v:
6682 // We generate target-independent intrinsic, which needs a second argument
6683 // for whether or not clz of zero is undefined; on ARM it isn't.
6684 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
6685 break;
6686 case NEON::BI__builtin_neon_vcvt_f32_v:
6687 case NEON::BI__builtin_neon_vcvtq_f32_v:
6688 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6689 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
6690 HasLegalHalfType);
6691 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6692 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6693 case NEON::BI__builtin_neon_vcvt_f16_v:
6694 case NEON::BI__builtin_neon_vcvtq_f16_v:
6695 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6696 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
6697 HasLegalHalfType);
6698 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6699 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6700 case NEON::BI__builtin_neon_vcvt_n_f16_v:
6701 case NEON::BI__builtin_neon_vcvt_n_f32_v:
6702 case NEON::BI__builtin_neon_vcvt_n_f64_v:
6703 case NEON::BI__builtin_neon_vcvtq_n_f16_v:
6704 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
6705 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
6706 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
6707 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
6708 Function *F = CGM.getIntrinsic(Int, Tys);
6709 return EmitNeonCall(F, Ops, "vcvt_n");
6710 }
6711 case NEON::BI__builtin_neon_vcvt_n_s16_v:
6712 case NEON::BI__builtin_neon_vcvt_n_s32_v:
6713 case NEON::BI__builtin_neon_vcvt_n_u16_v:
6714 case NEON::BI__builtin_neon_vcvt_n_u32_v:
6715 case NEON::BI__builtin_neon_vcvt_n_s64_v:
6716 case NEON::BI__builtin_neon_vcvt_n_u64_v:
6717 case NEON::BI__builtin_neon_vcvtq_n_s16_v:
6718 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
6719 case NEON::BI__builtin_neon_vcvtq_n_u16_v:
6720 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
6721 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
6722 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
6723 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6724 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
6725 return EmitNeonCall(F, Ops, "vcvt_n");
6726 }
6727 case NEON::BI__builtin_neon_vcvt_s32_v:
6728 case NEON::BI__builtin_neon_vcvt_u32_v:
6729 case NEON::BI__builtin_neon_vcvt_s64_v:
6730 case NEON::BI__builtin_neon_vcvt_u64_v:
6731 case NEON::BI__builtin_neon_vcvt_s16_v:
6732 case NEON::BI__builtin_neon_vcvt_u16_v:
6733 case NEON::BI__builtin_neon_vcvtq_s32_v:
6734 case NEON::BI__builtin_neon_vcvtq_u32_v:
6735 case NEON::BI__builtin_neon_vcvtq_s64_v:
6736 case NEON::BI__builtin_neon_vcvtq_u64_v:
6737 case NEON::BI__builtin_neon_vcvtq_s16_v:
6738 case NEON::BI__builtin_neon_vcvtq_u16_v: {
6739 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
6740 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
6741 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
6742 }
6743 case NEON::BI__builtin_neon_vcvta_s16_v:
6744 case NEON::BI__builtin_neon_vcvta_s32_v:
6745 case NEON::BI__builtin_neon_vcvta_s64_v:
6746 case NEON::BI__builtin_neon_vcvta_u16_v:
6747 case NEON::BI__builtin_neon_vcvta_u32_v:
6748 case NEON::BI__builtin_neon_vcvta_u64_v:
6749 case NEON::BI__builtin_neon_vcvtaq_s16_v:
6750 case NEON::BI__builtin_neon_vcvtaq_s32_v:
6751 case NEON::BI__builtin_neon_vcvtaq_s64_v:
6752 case NEON::BI__builtin_neon_vcvtaq_u16_v:
6753 case NEON::BI__builtin_neon_vcvtaq_u32_v:
6754 case NEON::BI__builtin_neon_vcvtaq_u64_v:
6755 case NEON::BI__builtin_neon_vcvtn_s16_v:
6756 case NEON::BI__builtin_neon_vcvtn_s32_v:
6757 case NEON::BI__builtin_neon_vcvtn_s64_v:
6758 case NEON::BI__builtin_neon_vcvtn_u16_v:
6759 case NEON::BI__builtin_neon_vcvtn_u32_v:
6760 case NEON::BI__builtin_neon_vcvtn_u64_v:
6761 case NEON::BI__builtin_neon_vcvtnq_s16_v:
6762 case NEON::BI__builtin_neon_vcvtnq_s32_v:
6763 case NEON::BI__builtin_neon_vcvtnq_s64_v:
6764 case NEON::BI__builtin_neon_vcvtnq_u16_v:
6765 case NEON::BI__builtin_neon_vcvtnq_u32_v:
6766 case NEON::BI__builtin_neon_vcvtnq_u64_v:
6767 case NEON::BI__builtin_neon_vcvtp_s16_v:
6768 case NEON::BI__builtin_neon_vcvtp_s32_v:
6769 case NEON::BI__builtin_neon_vcvtp_s64_v:
6770 case NEON::BI__builtin_neon_vcvtp_u16_v:
6771 case NEON::BI__builtin_neon_vcvtp_u32_v:
6772 case NEON::BI__builtin_neon_vcvtp_u64_v:
6773 case NEON::BI__builtin_neon_vcvtpq_s16_v:
6774 case NEON::BI__builtin_neon_vcvtpq_s32_v:
6775 case NEON::BI__builtin_neon_vcvtpq_s64_v:
6776 case NEON::BI__builtin_neon_vcvtpq_u16_v:
6777 case NEON::BI__builtin_neon_vcvtpq_u32_v:
6778 case NEON::BI__builtin_neon_vcvtpq_u64_v:
6779 case NEON::BI__builtin_neon_vcvtm_s16_v:
6780 case NEON::BI__builtin_neon_vcvtm_s32_v:
6781 case NEON::BI__builtin_neon_vcvtm_s64_v:
6782 case NEON::BI__builtin_neon_vcvtm_u16_v:
6783 case NEON::BI__builtin_neon_vcvtm_u32_v:
6784 case NEON::BI__builtin_neon_vcvtm_u64_v:
6785 case NEON::BI__builtin_neon_vcvtmq_s16_v:
6786 case NEON::BI__builtin_neon_vcvtmq_s32_v:
6787 case NEON::BI__builtin_neon_vcvtmq_s64_v:
6788 case NEON::BI__builtin_neon_vcvtmq_u16_v:
6789 case NEON::BI__builtin_neon_vcvtmq_u32_v:
6790 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6791 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6792 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
6793 }
6794 case NEON::BI__builtin_neon_vcvtx_f32_v: {
6795 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
6796 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
6797
6798 }
6799 case NEON::BI__builtin_neon_vext_v:
6800 case NEON::BI__builtin_neon_vextq_v: {
6801 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
6802 SmallVector<int, 16> Indices;
6803 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
6804 Indices.push_back(i+CV);
6805
6806 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6807 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6808 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
6809 }
6810 case NEON::BI__builtin_neon_vfma_v:
6811 case NEON::BI__builtin_neon_vfmaq_v: {
6812 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6813 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6814 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6815
6816 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
6818 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6819 {Ops[1], Ops[2], Ops[0]});
6820 }
6821 case NEON::BI__builtin_neon_vld1_v:
6822 case NEON::BI__builtin_neon_vld1q_v: {
6823 llvm::Type *Tys[] = {Ty, Int8PtrTy};
6824 Ops.push_back(getAlignmentValue32(PtrOp0));
6825 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
6826 }
6827 case NEON::BI__builtin_neon_vld1_x2_v:
6828 case NEON::BI__builtin_neon_vld1q_x2_v:
6829 case NEON::BI__builtin_neon_vld1_x3_v:
6830 case NEON::BI__builtin_neon_vld1q_x3_v:
6831 case NEON::BI__builtin_neon_vld1_x4_v:
6832 case NEON::BI__builtin_neon_vld1q_x4_v: {
6833 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getElementType());
6834 Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6835 llvm::Type *Tys[2] = { VTy, PTy };
6836 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
6837 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
6838 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6839 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6840 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6841 }
6842 case NEON::BI__builtin_neon_vld2_v:
6843 case NEON::BI__builtin_neon_vld2q_v:
6844 case NEON::BI__builtin_neon_vld3_v:
6845 case NEON::BI__builtin_neon_vld3q_v:
6846 case NEON::BI__builtin_neon_vld4_v:
6847 case NEON::BI__builtin_neon_vld4q_v:
6848 case NEON::BI__builtin_neon_vld2_dup_v:
6849 case NEON::BI__builtin_neon_vld2q_dup_v:
6850 case NEON::BI__builtin_neon_vld3_dup_v:
6851 case NEON::BI__builtin_neon_vld3q_dup_v:
6852 case NEON::BI__builtin_neon_vld4_dup_v:
6853 case NEON::BI__builtin_neon_vld4q_dup_v: {
6854 llvm::Type *Tys[] = {Ty, Int8PtrTy};
6855 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
6856 Value *Align = getAlignmentValue32(PtrOp1);
6857 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
6858 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6859 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6860 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6861 }
6862 case NEON::BI__builtin_neon_vld1_dup_v:
6863 case NEON::BI__builtin_neon_vld1q_dup_v: {
6864 Value *V = UndefValue::get(Ty);
6865 PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
6866 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
6867 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
6868 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
6869 return EmitNeonSplat(Ops[0], CI);
6870 }
6871 case NEON::BI__builtin_neon_vld2_lane_v:
6872 case NEON::BI__builtin_neon_vld2q_lane_v:
6873 case NEON::BI__builtin_neon_vld3_lane_v:
6874 case NEON::BI__builtin_neon_vld3q_lane_v:
6875 case NEON::BI__builtin_neon_vld4_lane_v:
6876 case NEON::BI__builtin_neon_vld4q_lane_v: {
6877 llvm::Type *Tys[] = {Ty, Int8PtrTy};
6878 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
6879 for (unsigned I = 2; I < Ops.size() - 1; ++I)
6880 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
6881 Ops.push_back(getAlignmentValue32(PtrOp1));
6882 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
6883 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6884 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6885 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6886 }
6887 case NEON::BI__builtin_neon_vmovl_v: {
6888 llvm::FixedVectorType *DTy =
6889 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
6890 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
6891 if (Usgn)
6892 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
6893 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
6894 }
6895 case NEON::BI__builtin_neon_vmovn_v: {
6896 llvm::FixedVectorType *QTy =
6897 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
6898 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
6899 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
6900 }
6901 case NEON::BI__builtin_neon_vmull_v:
6902 // FIXME: the integer vmull operations could be emitted in terms of pure
6903 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
6904 // hoisting the exts outside loops. Until global ISel comes along that can
6905 // see through such movement this leads to bad CodeGen. So we need an
6906 // intrinsic for now.
6907 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
6908 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
6909 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6910 case NEON::BI__builtin_neon_vpadal_v:
6911 case NEON::BI__builtin_neon_vpadalq_v: {
6912 // The source operand type has twice as many elements of half the size.
6913 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
6914 llvm::Type *EltTy =
6915 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
6916 auto *NarrowTy =
6917 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
6918 llvm::Type *Tys[2] = { Ty, NarrowTy };
6919 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
6920 }
6921 case NEON::BI__builtin_neon_vpaddl_v:
6922 case NEON::BI__builtin_neon_vpaddlq_v: {
6923 // The source operand type has twice as many elements of half the size.
6924 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
6925 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
6926 auto *NarrowTy =
6927 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
6928 llvm::Type *Tys[2] = { Ty, NarrowTy };
6929 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
6930 }
6931 case NEON::BI__builtin_neon_vqdmlal_v:
6932 case NEON::BI__builtin_neon_vqdmlsl_v: {
6933 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
6934 Ops[1] =
6935 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
6936 Ops.resize(2);
6937 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
6938 }
6939 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
6940 case NEON::BI__builtin_neon_vqdmulh_lane_v:
6941 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
6942 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
6943 auto *RTy = cast<llvm::FixedVectorType>(Ty);
6944 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
6945 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
6946 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
6947 RTy->getNumElements() * 2);
6948 llvm::Type *Tys[2] = {
6949 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
6950 /*isQuad*/ false))};
6951 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
6952 }
6953 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
6954 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
6955 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
6956 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
6957 llvm::Type *Tys[2] = {
6958 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
6959 /*isQuad*/ true))};
6960 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
6961 }
6962 case NEON::BI__builtin_neon_vqshl_n_v:
6963 case NEON::BI__builtin_neon_vqshlq_n_v:
6964 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
6965 1, false);
6966 case NEON::BI__builtin_neon_vqshlu_n_v:
6967 case NEON::BI__builtin_neon_vqshluq_n_v:
6968 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
6969 1, false);
6970 case NEON::BI__builtin_neon_vrecpe_v:
6971 case NEON::BI__builtin_neon_vrecpeq_v:
6972 case NEON::BI__builtin_neon_vrsqrte_v:
6973 case NEON::BI__builtin_neon_vrsqrteq_v:
6974 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
6975 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
6976 case NEON::BI__builtin_neon_vrndi_v:
6977 case NEON::BI__builtin_neon_vrndiq_v:
6978 Int = Builder.getIsFPConstrained()
6979 ? Intrinsic::experimental_constrained_nearbyint
6980 : Intrinsic::nearbyint;
6981 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
6982 case NEON::BI__builtin_neon_vrshr_n_v:
6983 case NEON::BI__builtin_neon_vrshrq_n_v:
6984 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
6985 1, true);
6986 case NEON::BI__builtin_neon_vsha512hq_v:
6987 case NEON::BI__builtin_neon_vsha512h2q_v:
6988 case NEON::BI__builtin_neon_vsha512su0q_v:
6989 case NEON::BI__builtin_neon_vsha512su1q_v: {
6990 Function *F = CGM.getIntrinsic(Int);
6991 return EmitNeonCall(F, Ops, "");
6992 }
6993 case NEON::BI__builtin_neon_vshl_n_v:
6994 case NEON::BI__builtin_neon_vshlq_n_v:
6995 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
6996 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
6997 "vshl_n");
6998 case NEON::BI__builtin_neon_vshll_n_v: {
6999 llvm::FixedVectorType *SrcTy =
7000 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
7001 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7002 if (Usgn)
7003 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
7004 else
7005 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
7006 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
7007 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
7008 }
7009 case NEON::BI__builtin_neon_vshrn_n_v: {
7010 llvm::FixedVectorType *SrcTy =
7011 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7012 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7013 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
7014 if (Usgn)
7015 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
7016 else
7017 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
7018 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
7019 }
7020 case NEON::BI__builtin_neon_vshr_n_v:
7021 case NEON::BI__builtin_neon_vshrq_n_v:
7022 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
7023 case NEON::BI__builtin_neon_vst1_v:
7024 case NEON::BI__builtin_neon_vst1q_v:
7025 case NEON::BI__builtin_neon_vst2_v:
7026 case NEON::BI__builtin_neon_vst2q_v:
7027 case NEON::BI__builtin_neon_vst3_v:
7028 case NEON::BI__builtin_neon_vst3q_v:
7029 case NEON::BI__builtin_neon_vst4_v:
7030 case NEON::BI__builtin_neon_vst4q_v:
7031 case NEON::BI__builtin_neon_vst2_lane_v:
7032 case NEON::BI__builtin_neon_vst2q_lane_v:
7033 case NEON::BI__builtin_neon_vst3_lane_v:
7034 case NEON::BI__builtin_neon_vst3q_lane_v:
7035 case NEON::BI__builtin_neon_vst4_lane_v:
7036 case NEON::BI__builtin_neon_vst4q_lane_v: {
7037 llvm::Type *Tys[] = {Int8PtrTy, Ty};
7038 Ops.push_back(getAlignmentValue32(PtrOp0));
7039 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
7040 }
7041 case NEON::BI__builtin_neon_vsm3partw1q_v:
7042 case NEON::BI__builtin_neon_vsm3partw2q_v:
7043 case NEON::BI__builtin_neon_vsm3ss1q_v:
7044 case NEON::BI__builtin_neon_vsm4ekeyq_v:
7045 case NEON::BI__builtin_neon_vsm4eq_v: {
7046 Function *F = CGM.getIntrinsic(Int);
7047 return EmitNeonCall(F, Ops, "");
7048 }
7049 case NEON::BI__builtin_neon_vsm3tt1aq_v:
7050 case NEON::BI__builtin_neon_vsm3tt1bq_v:
7051 case NEON::BI__builtin_neon_vsm3tt2aq_v:
7052 case NEON::BI__builtin_neon_vsm3tt2bq_v: {
7053 Function *F = CGM.getIntrinsic(Int);
7054 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7055 return EmitNeonCall(F, Ops, "");
7056 }
7057 case NEON::BI__builtin_neon_vst1_x2_v:
7058 case NEON::BI__builtin_neon_vst1q_x2_v:
7059 case NEON::BI__builtin_neon_vst1_x3_v:
7060 case NEON::BI__builtin_neon_vst1q_x3_v:
7061 case NEON::BI__builtin_neon_vst1_x4_v:
7062 case NEON::BI__builtin_neon_vst1q_x4_v: {
7063 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getElementType());
7064 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
7065 // in AArch64 it comes last. We may want to stick to one or another.
7066 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
7067 Arch == llvm::Triple::aarch64_32) {
7068 llvm::Type *Tys[2] = { VTy, PTy };
7069 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7070 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
7071 }
7072 llvm::Type *Tys[2] = { PTy, VTy };
7073 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
7074 }
7075 case NEON::BI__builtin_neon_vsubhn_v: {
7076 llvm::FixedVectorType *SrcTy =
7077 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7078
7079 // %sum = add <4 x i32> %lhs, %rhs
7080 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7081 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
7082 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
7083
7084 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
7085 Constant *ShiftAmt =
7086 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
7087 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
7088
7089 // %res = trunc <4 x i32> %high to <4 x i16>
7090 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
7091 }
7092 case NEON::BI__builtin_neon_vtrn_v:
7093 case NEON::BI__builtin_neon_vtrnq_v: {
7094 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7095 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7096 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7097 Value *SV = nullptr;
7098
7099 for (unsigned vi = 0; vi != 2; ++vi) {
7100 SmallVector<int, 16> Indices;
7101 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7102 Indices.push_back(i+vi);
7103 Indices.push_back(i+e+vi);
7104 }
7105 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7106 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
7107 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7108 }
7109 return SV;
7110 }
7111 case NEON::BI__builtin_neon_vtst_v:
7112 case NEON::BI__builtin_neon_vtstq_v: {
7113 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7114 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7115 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
7116 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
7117 ConstantAggregateZero::get(Ty));
7118 return Builder.CreateSExt(Ops[0], Ty, "vtst");
7119 }
7120 case NEON::BI__builtin_neon_vuzp_v:
7121 case NEON::BI__builtin_neon_vuzpq_v: {
7122 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7123 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7124 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7125 Value *SV = nullptr;
7126
7127 for (unsigned vi = 0; vi != 2; ++vi) {
7128 SmallVector<int, 16> Indices;
7129 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7130 Indices.push_back(2*i+vi);
7131
7132 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7133 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
7134 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7135 }
7136 return SV;
7137 }
7138 case NEON::BI__builtin_neon_vxarq_v: {
7139 Function *F = CGM.getIntrinsic(Int);
7140 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
7141 return EmitNeonCall(F, Ops, "");
7142 }
7143 case NEON::BI__builtin_neon_vzip_v:
7144 case NEON::BI__builtin_neon_vzipq_v: {
7145 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7146 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7147 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7148 Value *SV = nullptr;
7149
7150 for (unsigned vi = 0; vi != 2; ++vi) {
7151 SmallVector<int, 16> Indices;
7152 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7153 Indices.push_back((i + vi*e) >> 1);
7154 Indices.push_back(((i + vi*e) >> 1)+e);
7155 }
7156 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7157 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
7158 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7159 }
7160 return SV;
7161 }
7162 case NEON::BI__builtin_neon_vdot_v:
7163 case NEON::BI__builtin_neon_vdotq_v: {
7164 auto *InputTy =
7165 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
7166 llvm::Type *Tys[2] = { Ty, InputTy };
7167 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
7168 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
7169 }
7170 case NEON::BI__builtin_neon_vfmlal_low_v:
7171 case NEON::BI__builtin_neon_vfmlalq_low_v: {
7172 auto *InputTy =
7173 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
7174 llvm::Type *Tys[2] = { Ty, InputTy };
7175 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
7176 }
7177 case NEON::BI__builtin_neon_vfmlsl_low_v:
7178 case NEON::BI__builtin_neon_vfmlslq_low_v: {
7179 auto *InputTy =
7180 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
7181 llvm::Type *Tys[2] = { Ty, InputTy };
7182 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
7183 }
7184 case NEON::BI__builtin_neon_vfmlal_high_v:
7185 case NEON::BI__builtin_neon_vfmlalq_high_v: {
7186 auto *InputTy =
7187 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
7188 llvm::Type *Tys[2] = { Ty, InputTy };
7189 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
7190 }
7191 case NEON::BI__builtin_neon_vfmlsl_high_v:
7192 case NEON::BI__builtin_neon_vfmlslq_high_v: {
7193 auto *InputTy =
7194 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
7195 llvm::Type *Tys[2] = { Ty, InputTy };
7196 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
7197 }
7198 case NEON::BI__builtin_neon_vmmlaq_v: {
7199 auto *InputTy =
7200 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
7201 llvm::Type *Tys[2] = { Ty, InputTy };
7202 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
7203 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmmla");
7204 }
7205 case NEON::BI__builtin_neon_vusmmlaq_v: {
7206 auto *InputTy =
7207 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
7208 llvm::Type *Tys[2] = { Ty, InputTy };
7209 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
7210 }
7211 case NEON::BI__builtin_neon_vusdot_v:
7212 case NEON::BI__builtin_neon_vusdotq_v: {
7213 auto *InputTy =
7214 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
7215 llvm::Type *Tys[2] = { Ty, InputTy };
7216 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
7217 }
7218 case NEON::BI__builtin_neon_vbfdot_v:
7219 case NEON::BI__builtin_neon_vbfdotq_v: {
7220 llvm::Type *InputTy =
7221 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
7222 llvm::Type *Tys[2] = { Ty, InputTy };
7223 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
7224 }
7225 case NEON::BI__builtin_neon___a32_vcvt_bf16_v: {
7226 llvm::Type *Tys[1] = { Ty };
7227 Function *F = CGM.getIntrinsic(Int, Tys);
7228 return EmitNeonCall(F, Ops, "vcvtfp2bf");
7229 }
7230
7231 }
7232
7233 assert(Int && "Expected valid intrinsic number");
7234
7235 // Determine the type(s) of this overloaded AArch64 intrinsic.
7236 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
7237
7238 Value *Result = EmitNeonCall(F, Ops, NameHint);
7239 llvm::Type *ResultType = ConvertType(E->getType());
7240 // AArch64 intrinsic one-element vector type cast to
7241 // scalar type expected by the builtin
7242 return Builder.CreateBitCast(Result, ResultType, NameHint);
7243}
7244
7246 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
7247 const CmpInst::Predicate Ip, const Twine &Name) {
7248 llvm::Type *OTy = Op->getType();
7249
7250 // FIXME: this is utterly horrific. We should not be looking at previous
7251 // codegen context to find out what needs doing. Unfortunately TableGen
7252 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
7253 // (etc).
7254 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
7255 OTy = BI->getOperand(0)->getType();
7256
7257 Op = Builder.CreateBitCast(Op, OTy);
7258 if (OTy->getScalarType()->isFloatingPointTy()) {
7259 if (Fp == CmpInst::FCMP_OEQ)
7260 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
7261 else
7262 Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));
7263 } else {
7264 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
7265 }
7266 return Builder.CreateSExt(Op, Ty, Name);
7267}
7268
7270 Value *ExtOp, Value *IndexOp,
7271 llvm::Type *ResTy, unsigned IntID,
7272 const char *Name) {
7274 if (ExtOp)
7275 TblOps.push_back(ExtOp);
7276
7277 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
7278 SmallVector<int, 16> Indices;
7279 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
7280 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
7281 Indices.push_back(2*i);
7282 Indices.push_back(2*i+1);
7283 }
7284
7285 int PairPos = 0, End = Ops.size() - 1;
7286 while (PairPos < End) {
7287 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
7288 Ops[PairPos+1], Indices,
7289 Name));
7290 PairPos += 2;
7291 }
7292
7293 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
7294 // of the 128-bit lookup table with zero.
7295 if (PairPos == End) {
7296 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
7297 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
7298 ZeroTbl, Indices, Name));
7299 }
7300
7301 Function *TblF;
7302 TblOps.push_back(IndexOp);
7303 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
7304
7305 return CGF.EmitNeonCall(TblF, TblOps, Name);
7306}
7307
7308Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
7309 unsigned Value;
7310 switch (BuiltinID) {
7311 default:
7312 return nullptr;
7313 case clang::ARM::BI__builtin_arm_nop:
7314 Value = 0;
7315 break;
7316 case clang::ARM::BI__builtin_arm_yield:
7317 case clang::ARM::BI__yield:
7318 Value = 1;
7319 break;
7320 case clang::ARM::BI__builtin_arm_wfe:
7321 case clang::ARM::BI__wfe:
7322 Value = 2;
7323 break;
7324 case clang::ARM::BI__builtin_arm_wfi:
7325 case clang::ARM::BI__wfi:
7326 Value = 3;
7327 break;
7328 case clang::ARM::BI__builtin_arm_sev:
7329 case clang::ARM::BI__sev:
7330 Value = 4;
7331 break;
7332 case clang::ARM::BI__builtin_arm_sevl:
7333 case clang::ARM::BI__sevl:
7334 Value = 5;
7335 break;
7336 }
7337
7338 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
7339 llvm::ConstantInt::get(Int32Ty, Value));
7340}
7341
7347
7348// Generates the IR for the read/write special register builtin,
7349// ValueType is the type of the value that is to be written or read,
7350// RegisterType is the type of the register being written to or read from.
7352 const CallExpr *E,
7353 llvm::Type *RegisterType,
7354 llvm::Type *ValueType,
7355 SpecialRegisterAccessKind AccessKind,
7356 StringRef SysReg = "") {
7357 // write and register intrinsics only support 32 and 64 bit operations.
7358 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
7359 && "Unsupported size for register.");
7360
7361 CodeGen::CGBuilderTy &Builder = CGF.Builder;
7362 CodeGen::CodeGenModule &CGM = CGF.CGM;
7363 LLVMContext &Context = CGM.getLLVMContext();
7364
7365 if (SysReg.empty()) {
7366 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
7367 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
7368 }
7369
7370 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
7371 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
7372 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
7373
7374 llvm::Type *Types[] = { RegisterType };
7375
7376 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
7377 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
7378 && "Can't fit 64-bit value in 32-bit register");
7379
7380 if (AccessKind != Write) {
7381 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
7382 llvm::Function *F = CGM.getIntrinsic(
7383 AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
7384 : llvm::Intrinsic::read_register,
7385 Types);
7386 llvm::Value *Call = Builder.CreateCall(F, Metadata);
7387
7388 if (MixedTypes)
7389 // Read into 64 bit register and then truncate result to 32 bit.
7390 return Builder.CreateTrunc(Call, ValueType);
7391
7392 if (ValueType->isPointerTy())
7393 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
7394 return Builder.CreateIntToPtr(Call, ValueType);
7395
7396 return Call;
7397 }
7398
7399 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
7400 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
7401 if (MixedTypes) {
7402 // Extend 32 bit write value to 64 bit to pass to write.
7403 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
7404 return Builder.CreateCall(F, { Metadata, ArgValue });
7405 }
7406
7407 if (ValueType->isPointerTy()) {
7408 // Have VoidPtrTy ArgValue but want to return an i32/i64.
7409 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
7410 return Builder.CreateCall(F, { Metadata, ArgValue });
7411 }
7412
7413 return Builder.CreateCall(F, { Metadata, ArgValue });
7414}
7415
7416/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
7417/// argument that specifies the vector type.
7418static bool HasExtraNeonArgument(unsigned BuiltinID) {
7419 switch (BuiltinID) {
7420 default: break;
7421 case NEON::BI__builtin_neon_vget_lane_i8:
7422 case NEON::BI__builtin_neon_vget_lane_i16:
7423 case NEON::BI__builtin_neon_vget_lane_bf16:
7424 case NEON::BI__builtin_neon_vget_lane_i32:
7425 case NEON::BI__builtin_neon_vget_lane_i64:
7426 case NEON::BI__builtin_neon_vget_lane_f32:
7427 case NEON::BI__builtin_neon_vgetq_lane_i8:
7428 case NEON::BI__builtin_neon_vgetq_lane_i16:
7429 case NEON::BI__builtin_neon_vgetq_lane_bf16:
7430 case NEON::BI__builtin_neon_vgetq_lane_i32:
7431 case NEON::BI__builtin_neon_vgetq_lane_i64:
7432 case NEON::BI__builtin_neon_vgetq_lane_f32:
7433 case NEON::BI__builtin_neon_vduph_lane_bf16:
7434 case NEON::BI__builtin_neon_vduph_laneq_bf16:
7435 case NEON::BI__builtin_neon_vset_lane_i8:
7436 case NEON::BI__builtin_neon_vset_lane_i16:
7437 case NEON::BI__builtin_neon_vset_lane_bf16:
7438 case NEON::BI__builtin_neon_vset_lane_i32:
7439 case NEON::BI__builtin_neon_vset_lane_i64:
7440 case NEON::BI__builtin_neon_vset_lane_f32:
7441 case NEON::BI__builtin_neon_vsetq_lane_i8:
7442 case NEON::BI__builtin_neon_vsetq_lane_i16:
7443 case NEON::BI__builtin_neon_vsetq_lane_bf16:
7444 case NEON::BI__builtin_neon_vsetq_lane_i32:
7445 case NEON::BI__builtin_neon_vsetq_lane_i64:
7446 case NEON::BI__builtin_neon_vsetq_lane_f32:
7447 case NEON::BI__builtin_neon_vsha1h_u32:
7448 case NEON::BI__builtin_neon_vsha1cq_u32:
7449 case NEON::BI__builtin_neon_vsha1pq_u32:
7450 case NEON::BI__builtin_neon_vsha1mq_u32:
7451 case NEON::BI__builtin_neon_vcvth_bf16_f32:
7452 case clang::ARM::BI_MoveToCoprocessor:
7453 case clang::ARM::BI_MoveToCoprocessor2:
7454 return false;
7455 }
7456 return true;
7457}
7458
7459Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
7460 const CallExpr *E,
7461 ReturnValueSlot ReturnValue,
7462 llvm::Triple::ArchType Arch) {
7463 if (auto Hint = GetValueForARMHint(BuiltinID))
7464 return Hint;
7465
7466 if (BuiltinID == clang::ARM::BI__emit) {
7467 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
7468 llvm::FunctionType *FTy =
7469 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
7470
7472 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
7473 llvm_unreachable("Sema will ensure that the parameter is constant");
7474
7475 llvm::APSInt Value = Result.Val.getInt();
7476 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
7477
7478 llvm::InlineAsm *Emit =
7479 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
7480 /*hasSideEffects=*/true)
7481 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
7482 /*hasSideEffects=*/true);
7483
7484 return Builder.CreateCall(Emit);
7485 }
7486
7487 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
7488 Value *Option = EmitScalarExpr(E->getArg(0));
7489 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
7490 }
7491
7492 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
7494 Value *RW = EmitScalarExpr(E->getArg(1));
7495 Value *IsData = EmitScalarExpr(E->getArg(2));
7496
7497 // Locality is not supported on ARM target
7498 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
7499
7500 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
7501 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
7502 }
7503
7504 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
7505 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
7506 return Builder.CreateCall(
7507 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
7508 }
7509
7510 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
7511 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
7512 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
7513 }
7514 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
7515 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
7516 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
7517 "cls");
7518 }
7519
7520 if (BuiltinID == clang::ARM::BI__clear_cache) {
7521 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
7522 const FunctionDecl *FD = E->getDirectCallee();
7523 Value *Ops[2];
7524 for (unsigned i = 0; i < 2; i++)
7525 Ops[i] = EmitScalarExpr(E->getArg(i));
7526 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
7527 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
7528 StringRef Name = FD->getName();
7529 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
7530 }
7531
7532 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
7533 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
7534 Function *F;
7535
7536 switch (BuiltinID) {
7537 default: llvm_unreachable("unexpected builtin");
7538 case clang::ARM::BI__builtin_arm_mcrr:
7539 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
7540 break;
7541 case clang::ARM::BI__builtin_arm_mcrr2:
7542 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
7543 break;
7544 }
7545
7546 // MCRR{2} instruction has 5 operands but
7547 // the intrinsic has 4 because Rt and Rt2
7548 // are represented as a single unsigned 64
7549 // bit integer in the intrinsic definition
7550 // but internally it's represented as 2 32
7551 // bit integers.
7552
7553 Value *Coproc = EmitScalarExpr(E->getArg(0));
7554 Value *Opc1 = EmitScalarExpr(E->getArg(1));
7555 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
7556 Value *CRm = EmitScalarExpr(E->getArg(3));
7557
7558 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
7559 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
7560 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
7561 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
7562
7563 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
7564 }
7565
7566 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
7567 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
7568 Function *F;
7569
7570 switch (BuiltinID) {
7571 default: llvm_unreachable("unexpected builtin");
7572 case clang::ARM::BI__builtin_arm_mrrc:
7573 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
7574 break;
7575 case clang::ARM::BI__builtin_arm_mrrc2:
7576 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
7577 break;
7578 }
7579
7580 Value *Coproc = EmitScalarExpr(E->getArg(0));
7581 Value *Opc1 = EmitScalarExpr(E->getArg(1));
7582 Value *CRm = EmitScalarExpr(E->getArg(2));
7583 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
7584
7585 // Returns an unsigned 64 bit integer, represented
7586 // as two 32 bit integers.
7587
7588 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
7589 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
7590 Rt = Builder.CreateZExt(Rt, Int64Ty);
7591 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
7592
7593 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
7594 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
7595 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
7596
7597 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
7598 }
7599
7600 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
7601 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
7602 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
7603 getContext().getTypeSize(E->getType()) == 64) ||
7604 BuiltinID == clang::ARM::BI__ldrexd) {
7605 Function *F;
7606
7607 switch (BuiltinID) {
7608 default: llvm_unreachable("unexpected builtin");
7609 case clang::ARM::BI__builtin_arm_ldaex:
7610 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
7611 break;
7612 case clang::ARM::BI__builtin_arm_ldrexd:
7613 case clang::ARM::BI__builtin_arm_ldrex:
7614 case clang::ARM::BI__ldrexd:
7615 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
7616 break;
7617 }
7618
7619 Value *LdPtr = EmitScalarExpr(E->getArg(0));
7620 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
7621 "ldrexd");
7622
7623 Value *Val0 = Builder.CreateExtractValue(Val, 1);
7624 Value *Val1 = Builder.CreateExtractValue(Val, 0);
7625 Val0 = Builder.CreateZExt(Val0, Int64Ty);
7626 Val1 = Builder.CreateZExt(Val1, Int64Ty);
7627
7628 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
7629 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
7630 Val = Builder.CreateOr(Val, Val1);
7631 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
7632 }
7633
7634 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
7635 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
7636 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
7637
7638 QualType Ty = E->getType();
7639 llvm::Type *RealResTy = ConvertType(Ty);
7640 llvm::Type *IntTy =
7641 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
7642 llvm::Type *PtrTy = IntTy->getPointerTo();
7643 LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
7644
7645 Function *F = CGM.getIntrinsic(
7646 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
7647 : Intrinsic::arm_ldrex,
7648 PtrTy);
7649 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
7650 Val->addParamAttr(
7651 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
7652
7653 if (RealResTy->isPointerTy())
7654 return Builder.CreateIntToPtr(Val, RealResTy);
7655 else {
7656 llvm::Type *IntResTy = llvm::IntegerType::get(
7657 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
7658 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
7659 RealResTy);
7660 }
7661 }
7662
7663 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
7664 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
7665 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
7666 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
7667 Function *F = CGM.getIntrinsic(
7668 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
7669 : Intrinsic::arm_strexd);
7670 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
7671
7672 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
7673 Value *Val = EmitScalarExpr(E->getArg(0));
7674 Builder.CreateStore(Val, Tmp);
7675
7676 Address LdPtr = Builder.CreateElementBitCast(Tmp, STy);
7677 Val = Builder.CreateLoad(LdPtr);
7678
7679 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
7680 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
7681 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
7682 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
7683 }
7684
7685 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
7686 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
7687 Value *StoreVal = EmitScalarExpr(E->getArg(0));
7688 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
7689
7690 QualType Ty = E->getArg(0)->getType();
7691 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
7692 getContext().getTypeSize(Ty));
7693 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
7694
7695 if (StoreVal->getType()->isPointerTy())
7696 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
7697 else {
7698 llvm::Type *IntTy = llvm::IntegerType::get(
7700 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
7701 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
7702 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
7703 }
7704
7705 Function *F = CGM.getIntrinsic(
7706 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
7707 : Intrinsic::arm_strex,
7708 StoreAddr->getType());
7709
7710 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
7711 CI->addParamAttr(
7712 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
7713 return CI;
7714 }
7715
7716 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
7717 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
7718 return Builder.CreateCall(F);
7719 }
7720
7721 // CRC32
7722 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
7723 switch (BuiltinID) {
7724 case clang::ARM::BI__builtin_arm_crc32b:
7725 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
7726 case clang::ARM::BI__builtin_arm_crc32cb:
7727 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
7728 case clang::ARM::BI__builtin_arm_crc32h:
7729 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
7730 case clang::ARM::BI__builtin_arm_crc32ch:
7731 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
7732 case clang::ARM::BI__builtin_arm_crc32w:
7733 case clang::ARM::BI__builtin_arm_crc32d:
7734 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
7735 case clang::ARM::BI__builtin_arm_crc32cw:
7736 case clang::ARM::BI__builtin_arm_crc32cd:
7737 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
7738 }
7739
7740 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
7741 Value *Arg0 = EmitScalarExpr(E->getArg(0));
7742 Value *Arg1 = EmitScalarExpr(E->getArg(1));
7743
7744 // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
7745 // intrinsics, hence we need different codegen for these cases.
7746 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
7747 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
7748 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
7749 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
7750 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
7751 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
7752
7753 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
7754 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
7755 return Builder.CreateCall(F, {Res, Arg1b});
7756 } else {
7757 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
7758
7759 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
7760 return Builder.CreateCall(F, {Arg0, Arg1});
7761 }
7762 }
7763
7764 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
7765 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
7766 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
7767 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
7768 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
7769 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
7770
7771 SpecialRegisterAccessKind AccessKind = Write;
7772 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
7773 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
7774 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
7775 AccessKind = VolatileRead;
7776
7777 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
7778 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
7779
7780 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
7781 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
7782
7783 llvm::Type *ValueType;
7784 llvm::Type *RegisterType;
7785 if (IsPointerBuiltin) {
7786 ValueType = VoidPtrTy;
7787 RegisterType = Int32Ty;
7788 } else if (Is64Bit) {
7789 ValueType = RegisterType = Int64Ty;
7790 } else {
7791 ValueType = RegisterType = Int32Ty;
7792 }
7793
7794 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
7795 AccessKind);
7796 }
7797
7798 if (BuiltinID == ARM::BI__builtin_sponentry) {
7799 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
7800 return Builder.CreateCall(F);
7801 }
7802
7803 // Handle MSVC intrinsics before argument evaluation to prevent double
7804 // evaluation.
7805 if (Optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
7806 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
7807
7808 // Deal with MVE builtins
7809 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
7810 return Result;
7811 // Handle CDE builtins
7812 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
7813 return Result;
7814
7815 // Find out if any arguments are required to be integer constant
7816 // expressions.
7817 unsigned ICEArguments = 0;
7819 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
7820 assert(Error == ASTContext::GE_None && "Should not codegen an error");
7821
7822 auto getAlignmentValue32 = [&](Address addr) -> Value* {
7823 return Builder.getInt32(addr.getAlignment().getQuantity());
7824 };
7825
7826 Address PtrOp0 = Address::invalid();
7827 Address PtrOp1 = Address::invalid();
7829 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
7830 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
7831 for (unsigned i = 0, e = NumArgs; i != e; i++) {
7832 if (i == 0) {
7833 switch (BuiltinID) {
7834 case NEON::BI__builtin_neon_vld1_v:
7835 case NEON::BI__builtin_neon_vld1q_v:
7836 case NEON::BI__builtin_neon_vld1q_lane_v:
7837 case NEON::BI__builtin_neon_vld1_lane_v:
7838 case NEON::BI__builtin_neon_vld1_dup_v:
7839 case NEON::BI__builtin_neon_vld1q_dup_v:
7840 case NEON::BI__builtin_neon_vst1_v:
7841 case NEON::BI__builtin_neon_vst1q_v:
7842 case NEON::BI__builtin_neon_vst1q_lane_v:
7843 case NEON::BI__builtin_neon_vst1_lane_v:
7844 case NEON::BI__builtin_neon_vst2_v:
7845 case NEON::BI__builtin_neon_vst2q_v:
7846 case NEON::BI__builtin_neon_vst2_lane_v:
7847 case NEON::BI__builtin_neon_vst2q_lane_v:
7848 case NEON::BI__builtin_neon_vst3_v:
7849 case NEON::BI__builtin_neon_vst3q_v:
7850 case NEON::BI__builtin_neon_vst3_lane_v:
7851 case NEON::BI__builtin_neon_vst3q_lane_v:
7852 case NEON::BI__builtin_neon_vst4_v:
7853 case NEON::BI__builtin_neon_vst4q_v:
7854 case NEON::BI__builtin_neon_vst4_lane_v:
7855 case NEON::BI__builtin_neon_vst4q_lane_v:
7856 // Get the alignment for the argument in addition to the value;
7857 // we'll use it later.
7858 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
7859 Ops.push_back(PtrOp0.getPointer());
7860 continue;
7861 }
7862 }
7863 if (i == 1) {
7864 switch (BuiltinID) {
7865 case NEON::BI__builtin_neon_vld2_v:
7866 case NEON::BI__builtin_neon_vld2q_v:
7867 case NEON::BI__builtin_neon_vld3_v:
7868 case NEON::BI__builtin_neon_vld3q_v:
7869 case NEON::BI__builtin_neon_vld4_v:
7870 case NEON::BI__builtin_neon_vld4q_v:
7871 case NEON::BI__builtin_neon_vld2_lane_v:
7872 case NEON::BI__builtin_neon_vld2q_lane_v:
7873 case NEON::BI__builtin_neon_vld3_lane_v:
7874 case NEON::BI__builtin_neon_vld3q_lane_v:
7875 case NEON::BI__builtin_neon_vld4_lane_v:
7876 case NEON::BI__builtin_neon_vld4q_lane_v:
7877 case NEON::BI__builtin_neon_vld2_dup_v:
7878 case NEON::BI__builtin_neon_vld2q_dup_v:
7879 case NEON::BI__builtin_neon_vld3_dup_v:
7880 case NEON::BI__builtin_neon_vld3q_dup_v:
7881 case NEON::BI__builtin_neon_vld4_dup_v:
7882 case NEON::BI__builtin_neon_vld4q_dup_v:
7883 // Get the alignment for the argument in addition to the value;
7884 // we'll use it later.
7885 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
7886 Ops.push_back(PtrOp1.getPointer());
7887 continue;
7888 }
7889 }
7890
7891 if ((ICEArguments & (1 << i)) == 0) {
7892 Ops.push_back(EmitScalarExpr(E->getArg(i)));
7893 } else {
7894 // If this is required to be a constant, constant fold it so that we know
7895 // that the generated intrinsic gets a ConstantInt.
7896 Ops.push_back(llvm::ConstantInt::get(
7899 }
7900 }
7901
7902 switch (BuiltinID) {
7903 default: break;
7904
7905 case NEON::BI__builtin_neon_vget_lane_i8:
7906 case NEON::BI__builtin_neon_vget_lane_i16:
7907 case NEON::BI__builtin_neon_vget_lane_i32:
7908 case NEON::BI__builtin_neon_vget_lane_i64:
7909 case NEON::BI__builtin_neon_vget_lane_bf16:
7910 case NEON::BI__builtin_neon_vget_lane_f32:
7911 case NEON::BI__builtin_neon_vgetq_lane_i8:
7912 case NEON::BI__builtin_neon_vgetq_lane_i16:
7913 case NEON::BI__builtin_neon_vgetq_lane_i32:
7914 case NEON::BI__builtin_neon_vgetq_lane_i64:
7915 case NEON::BI__builtin_neon_vgetq_lane_bf16:
7916 case NEON::BI__builtin_neon_vgetq_lane_f32:
7917 case NEON::BI__builtin_neon_vduph_lane_bf16:
7918 case NEON::BI__builtin_neon_vduph_laneq_bf16:
7919 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
7920
7921 case NEON::BI__builtin_neon_vrndns_f32: {
7922 Value *Arg = EmitScalarExpr(E->getArg(0));
7923 llvm::Type *Tys[] = {Arg->getType()};
7924 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
7925 return Builder.CreateCall(F, {Arg}, "vrndn"); }
7926
7927 case NEON::BI__builtin_neon_vset_lane_i8:
7928 case NEON::BI__builtin_neon_vset_lane_i16:
7929 case NEON::BI__builtin_neon_vset_lane_i32:
7930 case NEON::BI__builtin_neon_vset_lane_i64:
7931 case NEON::BI__builtin_neon_vset_lane_bf16:
7932 case NEON::BI__builtin_neon_vset_lane_f32:
7933 case NEON::BI__builtin_neon_vsetq_lane_i8:
7934 case NEON::BI__builtin_neon_vsetq_lane_i16:
7935 case NEON::BI__builtin_neon_vsetq_lane_i32:
7936 case NEON::BI__builtin_neon_vsetq_lane_i64:
7937 case NEON::BI__builtin_neon_vsetq_lane_bf16:
7938 case NEON::BI__builtin_neon_vsetq_lane_f32:
7939 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
7940
7941 case NEON::BI__builtin_neon_vsha1h_u32:
7942 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
7943 "vsha1h");
7944 case NEON::BI__builtin_neon_vsha1cq_u32:
7945 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
7946 "vsha1h");
7947 case NEON::BI__builtin_neon_vsha1pq_u32:
7948 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
7949 "vsha1h");
7950 case NEON::BI__builtin_neon_vsha1mq_u32:
7951 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
7952 "vsha1h");
7953
7954 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
7955 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
7956 "vcvtbfp2bf");
7957 }
7958
7959 // The ARM _MoveToCoprocessor builtins put the input register value as
7960 // the first argument, but the LLVM intrinsic expects it as the third one.
7961 case clang::ARM::BI_MoveToCoprocessor:
7962 case clang::ARM::BI_MoveToCoprocessor2: {
7963 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
7964 ? Intrinsic::arm_mcr
7965 : Intrinsic::arm_mcr2);
7966 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
7967 Ops[3], Ops[4], Ops[5]});
7968 }
7969 }
7970
7971 // Get the last argument, which specifies the vector type.
7972 assert(HasExtraArg);
7973 const Expr *Arg = E->getArg(E->getNumArgs()-1);
7975 if (!Result)
7976 return nullptr;
7977
7978 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
7979 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
7980 // Determine the overloaded type of this builtin.
7981 llvm::Type *Ty;
7982 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
7983 Ty = FloatTy;
7984 else
7985 Ty = DoubleTy;
7986
7987 // Determine whether this is an unsigned conversion or not.
7988 bool usgn = Result->getZExtValue() == 1;
7989 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
7990
7991 // Call the appropriate intrinsic.
7992 Function *F = CGM.getIntrinsic(Int, Ty);
7993 return Builder.CreateCall(F, Ops, "vcvtr");
7994 }
7995
7996 // Determine the type of this overloaded NEON intrinsic.
7997 NeonTypeFlags Type = Result->getZExtValue();
7998 bool usgn = Type.isUnsigned();
7999 bool rightShift = false;
8000
8001 llvm::FixedVectorType *VTy =
8002 GetNeonType(this, Type, getTarget().hasLegalHalfType(), false,
8003 getTarget().hasBFloat16Type());
8004 llvm::Type *Ty = VTy;
8005 if (!Ty)
8006 return nullptr;
8007
8008 // Many NEON builtins have identical semantics and uses in ARM and
8009 // AArch64. Emit these in a single function.
8010 auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
8011 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
8012 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
8013 if (Builtin)
8015 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
8016 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
8017
8018 unsigned Int;
8019 switch (BuiltinID) {
8020 default: return nullptr;
8021 case NEON::BI__builtin_neon_vld1q_lane_v:
8022 // Handle 64-bit integer elements as a special case. Use shuffles of
8023 // one-element vectors to avoid poor code for i64 in the backend.
8024 if (VTy->getElementType()->isIntegerTy(64)) {
8025 // Extract the other lane.
8026 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8027 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
8028 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
8029 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
8030 // Load the value as a one-element vector.
8031 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
8032 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8033 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
8034 Value *Align = getAlignmentValue32(PtrOp0);
8035 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
8036 // Combine them.
8037 int Indices[] = {1 - Lane, Lane};
8038 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
8039 }
8040 LLVM_FALLTHROUGH;
8041 case NEON::BI__builtin_neon_vld1_lane_v: {
8042 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8043 PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
8044 Value *Ld = Builder.CreateLoad(PtrOp0);
8045 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
8046 }
8047 case NEON::BI__builtin_neon_vqrshrn_n_v:
8048 Int =
8049 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
8050 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
8051 1, true);
8052 case NEON::BI__builtin_neon_vqrshrun_n_v:
8053 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
8054 Ops, "vqrshrun_n", 1, true);
8055 case NEON::BI__builtin_neon_vqshrn_n_v:
8056 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
8057 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
8058 1, true);
8059 case NEON::BI__builtin_neon_vqshrun_n_v:
8060 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
8061 Ops, "vqshrun_n", 1, true);
8062 case NEON::BI__builtin_neon_vrecpe_v:
8063 case NEON::BI__builtin_neon_vrecpeq_v:
8064 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
8065 Ops, "vrecpe");
8066 case NEON::BI__builtin_neon_vrshrn_n_v:
8067 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
8068 Ops, "vrshrn_n", 1, true);
8069 case NEON::BI__builtin_neon_vrsra_n_v:
8070 case NEON::BI__builtin_neon_vrsraq_n_v:
8071 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8072 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8073 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
8074 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
8075 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
8076 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
8077 case NEON::BI__builtin_neon_vsri_n_v:
8078 case NEON::BI__builtin_neon_vsriq_n_v:
8079 rightShift = true;
8080 LLVM_FALLTHROUGH;
8081 case NEON::BI__builtin_neon_vsli_n_v:
8082 case NEON::BI__builtin_neon_vsliq_n_v:
8083 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
8084 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
8085 Ops, "vsli_n");
8086 case NEON::BI__builtin_neon_vsra_n_v:
8087 case NEON::BI__builtin_neon_vsraq_n_v:
8088 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8089 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
8090 return Builder.CreateAdd(Ops[0], Ops[1]);
8091 case NEON::BI__builtin_neon_vst1q_lane_v:
8092 // Handle 64-bit integer elements as a special case. Use a shuffle to get
8093 // a one-element vector and avoid poor code for i64 in the backend.
8094 if (VTy->getElementType()->isIntegerTy(64)) {
8095 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8096 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
8097 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
8098 Ops[2] = getAlignmentValue32(PtrOp0);
8099 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
8100 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
8101 Tys), Ops);
8102 }
8103 LLVM_FALLTHROUGH;
8104 case NEON::BI__builtin_neon_vst1_lane_v: {
8105 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8106 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
8107 auto St = Builder.CreateStore(
8108 Ops[1], Builder.CreateElementBitCast(PtrOp0, Ops[1]->getType()));
8109 return St;
8110 }
8111 case NEON::BI__builtin_neon_vtbl1_v:
8112 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
8113 Ops, "vtbl1");
8114 case NEON::BI__builtin_neon_vtbl2_v:
8115 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
8116 Ops, "vtbl2");
8117 case NEON::BI__builtin_neon_vtbl3_v:
8118 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
8119 Ops, "vtbl3");
8120 case NEON::BI__builtin_neon_vtbl4_v:
8121 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
8122 Ops, "vtbl4");
8123 case NEON::BI__builtin_neon_vtbx1_v:
8124 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
8125 Ops, "vtbx1");
8126 case NEON::BI__builtin_neon_vtbx2_v:
8127 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
8128 Ops, "vtbx2");
8129 case NEON::BI__builtin_neon_vtbx3_v:
8130 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
8131 Ops, "vtbx3");
8132 case NEON::BI__builtin_neon_vtbx4_v:
8133 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
8134 Ops, "vtbx4");
8135 }
8136}
8137
8138template<typename Integer>
8140 return E->getIntegerConstantExpr(Context)->getExtValue();
8141}
8142
8143static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
8144 llvm::Type *T, bool Unsigned) {
8145 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
8146 // which finds it convenient to specify signed/unsigned as a boolean flag.
8147 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
8148}
8149
8150static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
8151 uint32_t Shift, bool Unsigned) {
8152 // MVE helper function for integer shift right. This must handle signed vs
8153 // unsigned, and also deal specially with the case where the shift count is
8154 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
8155 // undefined behavior, but in MVE it's legal, so we must convert it to code
8156 // that is not undefined in IR.
8157 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
8158 ->getElementType()
8159 ->getPrimitiveSizeInBits();
8160 if (Shift == LaneBits) {
8161 // An unsigned shift of the full lane size always generates zero, so we can
8162 // simply emit a zero vector. A signed shift of the full lane size does the
8163 // same thing as shifting by one bit fewer.
8164 if (Unsigned)
8165 return llvm::Constant::getNullValue(V->getType());
8166 else
8167 --Shift;
8168 }
8169 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
8170}
8171
8172static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
8173 // MVE-specific helper function for a vector splat, which infers the element
8174 // count of the output vector by knowing that MVE vectors are all 128 bits
8175 // wide.
8176 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
8177 return Builder.CreateVectorSplat(Elements, V);
8178}
8179
8180static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
8181 CodeGenFunction *CGF,
8182 llvm::Value *V,
8183 llvm::Type *DestType) {
8184 // Convert one MVE vector type into another by reinterpreting its in-register
8185 // format.
8186 //
8187 // Little-endian, this is identical to a bitcast (which reinterprets the
8188 // memory format). But big-endian, they're not necessarily the same, because
8189 // the register and memory formats map to each other differently depending on
8190 // the lane size.
8191 //
8192 // We generate a bitcast whenever we can (if we're little-endian, or if the
8193 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
8194 // that performs the different kind of reinterpretation.
8195 if (CGF->getTarget().isBigEndian() &&
8196 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
8197 return Builder.CreateCall(
8198 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
8199 {DestType, V->getType()}),
8200 V);
8201 } else {
8202 return Builder.CreateBitCast(V, DestType);
8203 }
8204}
8205
8206static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
8207 // Make a shufflevector that extracts every other element of a vector (evens
8208 // or odds, as desired).
8209 SmallVector<int, 16> Indices;
8210 unsigned InputElements =
8211 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
8212 for (unsigned i = 0; i < InputElements; i += 2)
8213 Indices.push_back(i + Odd);
8214 return Builder.CreateShuffleVector(V, Indices);
8215}
8216
8217static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
8218 llvm::Value *V1) {
8219 // Make a shufflevector that interleaves two vectors element by element.
8220 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
8221 SmallVector<int, 16> Indices;
8222 unsigned InputElements =
8223 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
8224 for (unsigned i = 0; i < InputElements; i++) {
8225 Indices.push_back(i);
8226 Indices.push_back(i + InputElements);
8227 }
8228 return Builder.CreateShuffleVector(V0, V1, Indices);
8229}
8230
8231template<unsigned HighBit, unsigned OtherBits>
8232static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
8233 // MVE-specific helper function to make a vector splat of a constant such as
8234 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
8235 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
8236 unsigned LaneBits = T->getPrimitiveSizeInBits();
8237 uint32_t Value = HighBit << (LaneBits - 1);
8238 if (OtherBits)
8239 Value |= (1UL << (LaneBits - 1)) - 1;
8240 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
8241 return ARMMVEVectorSplat(Builder, Lane);
8242}
8243
8244static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
8245 llvm::Value *V,
8246 unsigned ReverseWidth) {
8247 // MVE-specific helper function which reverses the elements of a
8248 // vector within every (ReverseWidth)-bit collection of lanes.
8249 SmallVector<int, 16> Indices;
8250 unsigned LaneSize = V->getType()->getScalarSizeInBits();
8251 unsigned Elements = 128 / LaneSize;
8252 unsigned Mask = ReverseWidth / LaneSize - 1;
8253 for (unsigned i = 0; i < Elements; i++)
8254 Indices.push_back(i ^ Mask);
8255 return Builder.CreateShuffleVector(V, Indices);
8256}
8257
8259 const CallExpr *E,
8260 ReturnValueSlot ReturnValue,
8261 llvm::Triple::ArchType Arch) {
8262 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
8263 Intrinsic::ID IRIntr;
8264 unsigned NumVectors;
8265
8266 // Code autogenerated by Tablegen will handle all the simple builtins.
8267 switch (BuiltinID) {
8268 #include "clang/Basic/arm_mve_builtin_cg.inc"
8269
8270 // If we didn't match an MVE builtin id at all, go back to the
8271 // main EmitARMBuiltinExpr.
8272 default:
8273 return nullptr;
8274 }
8275
8276 // Anything that breaks from that switch is an MVE builtin that
8277 // needs handwritten code to generate.
8278
8279 switch (CustomCodeGenType) {
8280
8281 case CustomCodeGen::VLD24: {
8284
8285 auto MvecCType = E->getType();
8286 auto MvecLType = ConvertType(MvecCType);
8287 assert(MvecLType->isStructTy() &&
8288 "Return type for vld[24]q should be a struct");
8289 assert(MvecLType->getStructNumElements() == 1 &&
8290 "Return-type struct for vld[24]q should have one element");
8291 auto MvecLTypeInner = MvecLType->getStructElementType(0);
8292 assert(MvecLTypeInner->isArrayTy() &&
8293 "Return-type struct for vld[24]q should contain an array");
8294 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
8295 "Array member of return-type struct vld[24]q has wrong length");
8296 auto VecLType = MvecLTypeInner->getArrayElementType();
8297
8298 Tys.push_back(VecLType);
8299
8300 auto Addr = E->getArg(0);
8301 Ops.push_back(EmitScalarExpr(Addr));
8302 Tys.push_back(ConvertType(Addr->getType()));
8303
8304 Function *F = CGM.getIntrinsic(IRIntr, makeArrayRef(Tys));
8305 Value *LoadResult = Builder.CreateCall(F, Ops);
8306 Value *MvecOut = UndefValue::get(MvecLType);
8307 for (unsigned i = 0; i < NumVectors; ++i) {
8308 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
8309 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
8310 }
8311
8312 if (ReturnValue.isNull())
8313 return MvecOut;
8314 else
8315 return Builder.CreateStore(MvecOut, ReturnValue.getValue());
8316 }
8317
8318 case CustomCodeGen::VST24: {
8321
8322 auto Addr = E->getArg(0);
8323 Ops.push_back(EmitScalarExpr(Addr));
8324 Tys.push_back(ConvertType(Addr->getType()));
8325
8326 auto MvecCType = E->getArg(1)->getType();
8327 auto MvecLType = ConvertType(MvecCType);
8328 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
8329 assert(MvecLType->getStructNumElements() == 1 &&
8330 "Data-type struct for vst2q should have one element");
8331 auto MvecLTypeInner = MvecLType->getStructElementType(0);
8332 assert(MvecLTypeInner->isArrayTy() &&
8333 "Data-type struct for vst2q should contain an array");
8334 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
8335 "Array member of return-type struct vld[24]q has wrong length");
8336 auto VecLType = MvecLTypeInner->getArrayElementType();
8337
8338 Tys.push_back(VecLType);
8339
8340 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
8341 EmitAggExpr(E->getArg(1), MvecSlot);
8342 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
8343 for (unsigned i = 0; i < NumVectors; i++)
8344 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
8345
8346 Function *F = CGM.getIntrinsic(IRIntr, makeArrayRef(Tys));
8347 Value *ToReturn = nullptr;
8348 for (unsigned i = 0; i < NumVectors; i++) {
8349 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
8350 ToReturn = Builder.CreateCall(F, Ops);
8351 Ops.pop_back();
8352 }
8353 return ToReturn;
8354 }
8355 }
8356 llvm_unreachable("unknown custom codegen type.");
8357}
8358
8360 const CallExpr *E,
8361 ReturnValueSlot ReturnValue,
8362 llvm::Triple::ArchType Arch) {
8363 switch (BuiltinID) {
8364 default:
8365 return nullptr;
8366#include "clang/Basic/arm_cde_builtin_cg.inc"
8367 }
8368}
8369
8370static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
8371 const CallExpr *E,
8373 llvm::Triple::ArchType Arch) {
8374 unsigned int Int = 0;
8375 const char *s = nullptr;
8376
8377 switch (BuiltinID) {
8378 default:
8379 return nullptr;
8380 case NEON::BI__builtin_neon_vtbl1_v:
8381 case NEON::BI__builtin_neon_vqtbl1_v:
8382 case NEON::BI__builtin_neon_vqtbl1q_v:
8383 case NEON::BI__builtin_neon_vtbl2_v:
8384 case NEON::BI__builtin_neon_vqtbl2_v:
8385 case NEON::BI__builtin_neon_vqtbl2q_v:
8386 case NEON::BI__builtin_neon_vtbl3_v:
8387 case NEON::BI__builtin_neon_vqtbl3_v:
8388 case NEON::BI__builtin_neon_vqtbl3q_v:
8389 case NEON::BI__builtin_neon_vtbl4_v:
8390 case NEON::BI__builtin_neon_vqtbl4_v:
8391 case NEON::BI__builtin_neon_vqtbl4q_v:
8392 break;
8393 case NEON::BI__builtin_neon_vtbx1_v:
8394 case NEON::BI__builtin_neon_vqtbx1_v:
8395 case NEON::BI__builtin_neon_vqtbx1q_v:
8396 case NEON::BI__builtin_neon_vtbx2_v:
8397 case NEON::BI__builtin_neon_vqtbx2_v:
8398 case NEON::BI__builtin_neon_vqtbx2q_v:
8399 case NEON::BI__builtin_neon_vtbx3_v:
8400 case NEON::BI__builtin_neon_vqtbx3_v:
8401 case NEON::BI__builtin_neon_vqtbx3q_v:
8402 case NEON::BI__builtin_neon_vtbx4_v:
8403 case NEON::BI__builtin_neon_vqtbx4_v:
8404 case NEON::BI__builtin_neon_vqtbx4q_v:
8405 break;
8406 }
8407
8408 assert(E->getNumArgs() >= 3);
8409
8410 // Get the last argument, which specifies the vector type.
8411 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
8413 if (!Result)
8414 return nullptr;
8415
8416 // Determine the type of this overloaded NEON intrinsic.
8417 NeonTypeFlags Type = Result->getZExtValue();
8418 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
8419 if (!Ty)
8420 return nullptr;
8421
8422 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8423
8424 // AArch64 scalar builtins are not overloaded, they do not have an extra
8425 // argument that specifies the vector type, need to handle each case.
8426 switch (BuiltinID) {
8427 case NEON::BI__builtin_neon_vtbl1_v: {
8428 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
8429 Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
8430 "vtbl1");
8431 }
8432 case NEON::BI__builtin_neon_vtbl2_v: {
8433 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
8434 Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
8435 "vtbl1");
8436 }
8437 case NEON::BI__builtin_neon_vtbl3_v: {
8438 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
8439 Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
8440 "vtbl2");
8441 }
8442 case NEON::BI__builtin_neon_vtbl4_v: {
8443 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
8444 Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
8445 "vtbl2");
8446 }
8447 case NEON::BI__builtin_neon_vtbx1_v: {
8448 Value *TblRes =
8449 packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
8450 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
8451
8452 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
8453 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
8454 CmpRes = Builder.CreateSExt(CmpRes, Ty);
8455
8456 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
8457 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
8458 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
8459 }
8460 case NEON::BI__builtin_neon_vtbx2_v: {
8461 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
8462 Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
8463 "vtbx1");
8464 }
8465 case NEON::BI__builtin_neon_vtbx3_v: {
8466 Value *TblRes =
8467 packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
8468 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
8469
8470 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
8471 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
8472 TwentyFourV);
8473 CmpRes = Builder.CreateSExt(CmpRes, Ty);
8474
8475 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
8476 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
8477 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
8478 }
8479 case NEON::BI__builtin_neon_vtbx4_v: {
8480 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
8481 Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
8482 "vtbx2");
8483 }
8484 case NEON::BI__builtin_neon_vqtbl1_v:
8485 case NEON::BI__builtin_neon_vqtbl1q_v:
8486 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
8487 case NEON::BI__builtin_neon_vqtbl2_v:
8488 case NEON::BI__builtin_neon_vqtbl2q_v: {
8489 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
8490 case NEON::BI__builtin_neon_vqtbl3_v:
8491 case NEON::BI__builtin_neon_vqtbl3q_v:
8492 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
8493 case NEON::BI__builtin_neon_vqtbl4_v:
8494 case NEON::BI__builtin_neon_vqtbl4q_v:
8495 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
8496 case NEON::BI__builtin_neon_vqtbx1_v:
8497 case NEON::BI__builtin_neon_vqtbx1q_v:
8498 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
8499 case NEON::BI__builtin_neon_vqtbx2_v:
8500 case NEON::BI__builtin_neon_vqtbx2q_v:
8501 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
8502 case NEON::BI__builtin_neon_vqtbx3_v:
8503 case NEON::BI__builtin_neon_vqtbx3q_v:
8504 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
8505 case NEON::BI__builtin_neon_vqtbx4_v:
8506 case NEON::BI__builtin_neon_vqtbx4q_v:
8507 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
8508 }
8509 }
8510
8511 if (!Int)
8512 return nullptr;
8513
8514 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
8515 return CGF.EmitNeonCall(F, Ops, s);
8516}
8517
8519 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
8520 Op = Builder.CreateBitCast(Op, Int16Ty);
8521 Value *V = UndefValue::get(VTy);
8522 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
8523 Op = Builder.CreateInsertElement(V, Op, CI);
8524 return Op;
8525}
8526
8527/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
8528/// access builtin. Only required if it can't be inferred from the base pointer
8529/// operand.
8530llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
8531 switch (TypeFlags.getMemEltType()) {
8532 case SVETypeFlags::MemEltTyDefault:
8533 return getEltType(TypeFlags);
8534 case SVETypeFlags::MemEltTyInt8:
8535 return Builder.getInt8Ty();
8536 case SVETypeFlags::MemEltTyInt16:
8537 return Builder.getInt16Ty();
8538 case SVETypeFlags::MemEltTyInt32:
8539 return Builder.getInt32Ty();
8540 case SVETypeFlags::MemEltTyInt64:
8541 return Builder.getInt64Ty();
8542 }
8543 llvm_unreachable("Unknown MemEltType");
8544}
8545
8546llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
8547 switch (TypeFlags.getEltType()) {
8548 default:
8549 llvm_unreachable("Invalid SVETypeFlag!");
8550
8551 case SVETypeFlags::EltTyInt8:
8552 return Builder.getInt8Ty();
8553 case SVETypeFlags::EltTyInt16:
8554 return Builder.getInt16Ty();
8555 case SVETypeFlags::EltTyInt32:
8556 return Builder.getInt32Ty();
8557 case SVETypeFlags::EltTyInt64:
8558 return Builder.getInt64Ty();
8559
8560 case SVETypeFlags::EltTyFloat16:
8561 return Builder.getHalfTy();
8562 case SVETypeFlags::EltTyFloat32:
8563 return Builder.getFloatTy();
8564 case SVETypeFlags::EltTyFloat64:
8565 return Builder.getDoubleTy();
8566
8567 case SVETypeFlags::EltTyBFloat16:
8568 return Builder.getBFloatTy();
8569
8570 case SVETypeFlags::EltTyBool8:
8571 case SVETypeFlags::EltTyBool16:
8572 case SVETypeFlags::EltTyBool32:
8573 case SVETypeFlags::EltTyBool64:
8574 return Builder.getInt1Ty();
8575 }
8576}
8577
8578// Return the llvm predicate vector type corresponding to the specified element
8579// TypeFlags.
8580llvm::ScalableVectorType *
8582 switch (TypeFlags.getEltType()) {
8583 default: llvm_unreachable("Unhandled SVETypeFlag!");
8584
8585 case SVETypeFlags::EltTyInt8:
8586 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
8587 case SVETypeFlags::EltTyInt16:
8588 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
8589 case SVETypeFlags::EltTyInt32:
8590 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
8591 case SVETypeFlags::EltTyInt64:
8592 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
8593
8594 case SVETypeFlags::EltTyBFloat16:
8595 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
8596 case SVETypeFlags::EltTyFloat16:
8597 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
8598 case SVETypeFlags::EltTyFloat32:
8599 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
8600 case SVETypeFlags::EltTyFloat64:
8601 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
8602
8603 case SVETypeFlags::EltTyBool8:
8604 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
8605 case SVETypeFlags::EltTyBool16:
8606 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
8607 case SVETypeFlags::EltTyBool32:
8608 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
8609 case SVETypeFlags::EltTyBool64:
8610 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
8611 }
8612}
8613
8614// Return the llvm vector type corresponding to the specified element TypeFlags.
8615llvm::ScalableVectorType *
8616CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
8617 switch (TypeFlags.getEltType()) {
8618 default:
8619 llvm_unreachable("Invalid SVETypeFlag!");
8620
8621 case SVETypeFlags::EltTyInt8:
8622 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
8623 case SVETypeFlags::EltTyInt16:
8624 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
8625 case SVETypeFlags::EltTyInt32:
8626 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
8627 case SVETypeFlags::EltTyInt64:
8628 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
8629
8630 case SVETypeFlags::EltTyFloat16:
8631 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
8632 case SVETypeFlags::EltTyBFloat16:
8633 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
8634 case SVETypeFlags::EltTyFloat32:
8635 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
8636 case SVETypeFlags::EltTyFloat64:
8637 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
8638
8639 case SVETypeFlags::EltTyBool8:
8640 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
8641 case SVETypeFlags::EltTyBool16:
8642 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
8643 case SVETypeFlags::EltTyBool32:
8644 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
8645 case SVETypeFlags::EltTyBool64:
8646 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
8647 }
8648}
8649
8650llvm::Value *
8652 Function *Ptrue =
8653 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
8654 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
8655}
8656
8657constexpr unsigned SVEBitsPerBlock = 128;
8658
8659static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
8660 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
8661 return llvm::ScalableVectorType::get(EltTy, NumElts);
8662}
8663
8664// Reinterpret the input predicate so that it can be used to correctly isolate
8665// the elements of the specified datatype.
8667 llvm::ScalableVectorType *VTy) {
8668 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
8669 if (Pred->getType() == RTy)
8670 return Pred;
8671
8672 unsigned IntID;
8673 llvm::Type *IntrinsicTy;
8674 switch (VTy->getMinNumElements()) {
8675 default:
8676 llvm_unreachable("unsupported element count!");
8677 case 2:
8678 case 4:
8679 case 8:
8680 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
8681 IntrinsicTy = RTy;
8682 break;
8683 case 16:
8684 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
8685 IntrinsicTy = Pred->getType();
8686 break;
8687 }
8688
8689 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
8690 Value *C = Builder.CreateCall(F, Pred);
8691 assert(C->getType() == RTy && "Unexpected return type!");
8692 return C;
8693}
8694
8697 unsigned IntID) {
8698 auto *ResultTy = getSVEType(TypeFlags);
8699 auto *OverloadedTy =
8700 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
8701
8702 // At the ACLE level there's only one predicate type, svbool_t, which is
8703 // mapped to <n x 16 x i1>. However, this might be incompatible with the
8704 // actual type being loaded. For example, when loading doubles (i64) the
8705 // predicated should be <n x 2 x i1> instead. At the IR level the type of
8706 // the predicate and the data being loaded must match. Cast accordingly.
8707 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
8708
8709 Function *F = nullptr;
8710 if (Ops[1]->getType()->isVectorTy())
8711 // This is the "vector base, scalar offset" case. In order to uniquely
8712 // map this built-in to an LLVM IR intrinsic, we need both the return type
8713 // and the type of the vector base.
8714 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
8715 else
8716 // This is the "scalar base, vector offset case". The type of the offset
8717 // is encoded in the name of the intrinsic. We only need to specify the
8718 // return type in order to uniquely map this built-in to an LLVM IR
8719 // intrinsic.
8720 F = CGM.getIntrinsic(IntID, OverloadedTy);
8721
8722 // Pass 0 when the offset is missing. This can only be applied when using
8723 // the "vector base" addressing mode for which ACLE allows no offset. The
8724 // corresponding LLVM IR always requires an offset.
8725 if (Ops.size() == 2) {
8726 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
8727 Ops.push_back(ConstantInt::get(Int64Ty, 0));
8728 }
8729
8730 // For "vector base, scalar index" scale the index so that it becomes a
8731 // scalar offset.
8732 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
8733 unsigned BytesPerElt =
8734 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
8735 Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt);
8736 Ops[2] = Builder.CreateMul(Ops[2], Scale);
8737 }
8738
8739 Value *Call = Builder.CreateCall(F, Ops);
8740
8741 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
8742 // other cases it's folded into a nop.
8743 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
8744 : Builder.CreateSExt(Call, ResultTy);
8745}
8746
8749 unsigned IntID) {
8750 auto *SrcDataTy = getSVEType(TypeFlags);
8751 auto *OverloadedTy =
8752 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
8753
8754 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
8755 // it's the first argument. Move it accordingly.
8756 Ops.insert(Ops.begin(), Ops.pop_back_val());
8757
8758 Function *F = nullptr;
8759 if (Ops[2]->getType()->isVectorTy())
8760 // This is the "vector base, scalar offset" case. In order to uniquely
8761 // map this built-in to an LLVM IR intrinsic, we need both the return type
8762 // and the type of the vector base.
8763 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
8764 else
8765 // This is the "scalar base, vector offset case". The type of the offset
8766 // is encoded in the name of the intrinsic. We only need to specify the
8767 // return type in order to uniquely map this built-in to an LLVM IR
8768 // intrinsic.
8769 F = CGM.getIntrinsic(IntID, OverloadedTy);
8770
8771 // Pass 0 when the offset is missing. This can only be applied when using
8772 // the "vector base" addressing mode for which ACLE allows no offset. The
8773 // corresponding LLVM IR always requires an offset.
8774 if (Ops.size() == 3) {
8775 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
8776 Ops.push_back(ConstantInt::get(Int64Ty, 0));
8777 }
8778
8779 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
8780 // folded into a nop.
8781 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
8782
8783 // At the ACLE level there's only one predicate type, svbool_t, which is
8784 // mapped to <n x 16 x i1>. However, this might be incompatible with the
8785 // actual type being stored. For example, when storing doubles (i64) the
8786 // predicated should be <n x 2 x i1> instead. At the IR level the type of
8787 // the predicate and the data being stored must match. Cast accordingly.
8788 Ops[1] = EmitSVEPredicateCast(Ops[1], OverloadedTy);
8789
8790 // For "vector base, scalar index" scale the index so that it becomes a
8791 // scalar offset.
8792 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
8793 unsigned BytesPerElt =
8794 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
8795 Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt);
8796 Ops[3] = Builder.CreateMul(Ops[3], Scale);
8797 }
8798
8799 return Builder.CreateCall(F, Ops);
8800}
8801
8804 unsigned IntID) {
8805 // The gather prefetches are overloaded on the vector input - this can either
8806 // be the vector of base addresses or vector of offsets.
8807 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
8808 if (!OverloadedTy)
8809 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
8810
8811 // Cast the predicate from svbool_t to the right number of elements.
8812 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
8813
8814 // vector + imm addressing modes
8815 if (Ops[1]->getType()->isVectorTy()) {
8816 if (Ops.size() == 3) {
8817 // Pass 0 for 'vector+imm' when the index is omitted.
8818 Ops.push_back(ConstantInt::get(Int64Ty, 0));
8819
8820 // The sv_prfop is the last operand in the builtin and IR intrinsic.
8821 std::swap(Ops[2], Ops[3]);
8822 } else {
8823 // Index needs to be passed as scaled offset.
8824 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
8825 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
8826 Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt);
8827 Ops[2] = Builder.CreateMul(Ops[2], Scale);
8828 }
8829 }
8830
8831 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
8832 return Builder.CreateCall(F, Ops);
8833}
8834
8837 unsigned IntID) {
8838 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
8839 auto VecPtrTy = llvm::PointerType::getUnqual(VTy);
8840 auto EltPtrTy = llvm::PointerType::getUnqual(VTy->getElementType());
8841
8842 unsigned N;
8843 switch (IntID) {
8844 case Intrinsic::aarch64_sve_ld2:
8845 N = 2;
8846 break;
8847 case Intrinsic::aarch64_sve_ld3:
8848 N = 3;
8849 break;
8850 case Intrinsic::aarch64_sve_ld4:
8851 N = 4;
8852 break;
8853 default:
8854 llvm_unreachable("unknown intrinsic!");
8855 }
8856 auto RetTy = llvm::VectorType::get(VTy->getElementType(),
8857 VTy->getElementCount() * N);
8858
8859 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
8860 Value *BasePtr= Builder.CreateBitCast(Ops[1], VecPtrTy);
8861 Value *Offset = Ops.size() > 2 ? Ops[2] : Builder.getInt32(0);
8862 BasePtr = Builder.CreateGEP(VTy, BasePtr, Offset);
8863 BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy);
8864
8865 Function *F = CGM.getIntrinsic(IntID, {RetTy, Predicate->getType()});
8866 return Builder.CreateCall(F, { Predicate, BasePtr });
8867}
8868
8871 unsigned IntID) {
8872 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
8873 auto VecPtrTy = llvm::PointerType::getUnqual(VTy);
8874 auto EltPtrTy = llvm::PointerType::getUnqual(VTy->getElementType());
8875
8876 unsigned N;
8877 switch (IntID) {
8878 case Intrinsic::aarch64_sve_st2:
8879 N = 2;
8880 break;
8881 case Intrinsic::aarch64_sve_st3:
8882 N = 3;
8883 break;
8884 case Intrinsic::aarch64_sve_st4:
8885 N = 4;
8886 break;
8887 default:
8888 llvm_unreachable("unknown intrinsic!");
8889 }
8890 auto TupleTy =
8891 llvm::VectorType::get(VTy->getElementType(), VTy->getElementCount() * N);
8892
8893 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
8894 Value *BasePtr = Builder.CreateBitCast(Ops[1], VecPtrTy);
8895 Value *Offset = Ops.size() > 3 ? Ops[2] : Builder.getInt32(0);
8896 Value *Val = Ops.back();
8897 BasePtr = Builder.CreateGEP(VTy, BasePtr, Offset);
8898 BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy);
8899
8900 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
8901 // need to break up the tuple vector.
8903 Function *FExtr =
8904 CGM.getIntrinsic(Intrinsic::aarch64_sve_tuple_get, {VTy, TupleTy});
8905 for (unsigned I = 0; I < N; ++I)
8906 Operands.push_back(Builder.CreateCall(FExtr, {Val, Builder.getInt32(I)}));
8907 Operands.append({Predicate, BasePtr});
8908
8909 Function *F = CGM.getIntrinsic(IntID, { VTy });
8910 return Builder.CreateCall(F, Operands);
8911}
8912
8913// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
8914// svpmullt_pair intrinsics, with the exception that their results are bitcast
8915// to a wider type.
8918 unsigned BuiltinID) {
8919 // Splat scalar operand to vector (intrinsics with _n infix)
8920 if (TypeFlags.hasSplatOperand()) {
8921 unsigned OpNo = TypeFlags.getSplatOperand();
8922 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
8923 }
8924
8925 // The pair-wise function has a narrower overloaded type.
8926 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
8927 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
8928
8929 // Now bitcast to the wider result type.
8930 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
8931 return EmitSVEReinterpret(Call, Ty);
8932}
8933
8935 ArrayRef<Value *> Ops, unsigned BuiltinID) {
8936 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
8937 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
8938 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
8939}
8940
8943 unsigned BuiltinID) {
8944 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
8945 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
8946 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
8947
8948 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
8949 Value *BasePtr = Ops[1];
8950
8951 // Implement the index operand if not omitted.
8952 if (Ops.size() > 3) {
8953 BasePtr = Builder.CreateBitCast(BasePtr, MemoryTy->getPointerTo());
8954 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
8955 }
8956
8957 // Prefetch intriniscs always expect an i8*
8958 BasePtr = Builder.CreateBitCast(BasePtr, llvm::PointerType::getUnqual(Int8Ty));
8959 Value *PrfOp = Ops.back();
8960
8961 Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
8962 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
8963}
8964
8966 llvm::Type *ReturnTy,
8968 unsigned BuiltinID,
8969 bool IsZExtReturn) {
8970 QualType LangPTy = E->getArg(1)->getType();
8971 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
8972 LangPTy->castAs<PointerType>()->getPointeeType());
8973
8974 // The vector type that is returned may be different from the
8975 // eventual type loaded from memory.
8976 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
8977 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
8978
8979 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
8980 Value *BasePtr = Builder.CreateBitCast(Ops[1], MemoryTy->getPointerTo());
8981 Value *Offset = Ops.size() > 2 ? Ops[2] : Builder.getInt32(0);
8982 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Offset);
8983
8984 BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo());
8985 Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy);
8986 auto *Load =
8987 cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
8988 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
8989 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
8990
8991 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
8992 : Builder.CreateSExt(Load, VectorTy);
8993}
8994
8997 unsigned BuiltinID) {
8998 QualType LangPTy = E->getArg(1)->getType();
8999 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
9000 LangPTy->castAs<PointerType>()->getPointeeType());
9001
9002 // The vector type that is stored may be different from the
9003 // eventual type stored to memory.
9004 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
9005 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
9006
9007 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
9008 Value *BasePtr = Builder.CreateBitCast(Ops[1], MemoryTy->getPointerTo());
9009 Value *Offset = Ops.size() == 4 ? Ops[2] : Builder.getInt32(0);
9010 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Offset);
9011
9012 // Last value is always the data
9013 llvm::Value *Val = Builder.CreateTrunc(Ops.back(), MemoryTy);
9014
9015 BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo());
9016 Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy);
9017 auto *Store =
9018 cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
9019 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
9020 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
9021 return Store;
9022}
9023
9024// Limit the usage of scalable llvm IR generated by the ACLE by using the
9025// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
9026Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
9027 auto F = CGM.getIntrinsic(Intrinsic::aarch64_sve_dup_x, Ty);
9028 return Builder.CreateCall(F, Scalar);
9029}
9030
9032 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
9033}
9034
9035Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
9036 // FIXME: For big endian this needs an additional REV, or needs a separate
9037 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
9038 // instruction is defined as 'bitwise' equivalent from memory point of
9039 // view (when storing/reloading), whereas the svreinterpret builtin
9040 // implements bitwise equivalent cast from register point of view.
9041 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
9042 return Builder.CreateBitCast(Val, Ty);
9043}
9044
9045static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
9047 auto *SplatZero = Constant::getNullValue(Ty);
9048 Ops.insert(Ops.begin(), SplatZero);
9049}
9050
9051static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
9053 auto *SplatUndef = UndefValue::get(Ty);
9054 Ops.insert(Ops.begin(), SplatUndef);
9055}
9056
9059 llvm::Type *ResultType,
9060 ArrayRef<Value *> Ops) {
9061 if (TypeFlags.isOverloadNone())
9062 return {};
9063
9064 llvm::Type *DefaultType = getSVEType(TypeFlags);
9065
9066 if (TypeFlags.isOverloadWhile())
9067 return {DefaultType, Ops[1]->getType()};
9068
9069 if (TypeFlags.isOverloadWhileRW())
9070 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
9071
9072 if (TypeFlags.isOverloadCvt() || TypeFlags.isTupleSet())
9073 return {Ops[0]->getType(), Ops.back()->getType()};
9074
9075 if (TypeFlags.isTupleCreate() || TypeFlags.isTupleGet())
9076 return {ResultType, Ops[0]->getType()};
9077
9078 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
9079 return {DefaultType};
9080}
9081
9083 const CallExpr *E) {
9084 // Find out if any arguments are required to be integer constant expressions.
9085 unsigned ICEArguments = 0;
9087 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
9088 assert(Error == ASTContext::GE_None && "Should not codegen an error");
9089
9090 llvm::Type *Ty = ConvertType(E->getType());
9091 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
9092 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64) {
9093 Value *Val = EmitScalarExpr(E->getArg(0));
9094 return EmitSVEReinterpret(Val, Ty);
9095 }
9096
9098 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
9099 if ((ICEArguments & (1 << i)) == 0)
9100 Ops.push_back(EmitScalarExpr(E->getArg(i)));
9101 else {
9102 // If this is required to be a constant, constant fold it so that we know
9103 // that the generated intrinsic gets a ConstantInt.
9106 assert(Result && "Expected argument to be a constant");
9107
9108 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
9109 // truncate because the immediate has been range checked and no valid
9110 // immediate requires more than a handful of bits.
9111 *Result = Result->extOrTrunc(32);
9112 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
9113 }
9114 }
9115
9116 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
9118 SVETypeFlags TypeFlags(Builtin->TypeModifier);
9119 if (TypeFlags.isLoad())
9120 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
9121 TypeFlags.isZExtReturn());
9122 else if (TypeFlags.isStore())
9123 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
9124 else if (TypeFlags.isGatherLoad())
9125 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
9126 else if (TypeFlags.isScatterStore())
9127 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
9128 else if (TypeFlags.isPrefetch())
9129 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
9130 else if (TypeFlags.isGatherPrefetch())
9131 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
9132 else if (TypeFlags.isStructLoad())
9133 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
9134 else if (TypeFlags.isStructStore())
9135 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
9136 else if (TypeFlags.isUndef())
9137 return UndefValue::get(Ty);
9138 else if (Builtin->LLVMIntrinsic != 0) {
9139 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
9140 InsertExplicitZeroOperand(Builder, Ty, Ops);
9141
9142 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
9143 InsertExplicitUndefOperand(Builder, Ty, Ops);
9144
9145 // Some ACLE builtins leave out the argument to specify the predicate
9146 // pattern, which is expected to be expanded to an SV_ALL pattern.
9147 if (TypeFlags.isAppendSVALL())
9148 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
9149 if (TypeFlags.isInsertOp1SVALL())
9150 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
9151
9152 // Predicates must match the main datatype.
9153 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
9154 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
9155 if (PredTy->getElementType()->isIntegerTy(1))
9156 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
9157
9158 // Splat scalar operand to vector (intrinsics with _n infix)
9159 if (TypeFlags.hasSplatOperand()) {
9160 unsigned OpNo = TypeFlags.getSplatOperand();
9161 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
9162 }
9163
9164 if (TypeFlags.isReverseCompare())
9165 std::swap(Ops[1], Ops[2]);
9166
9167 if (TypeFlags.isReverseUSDOT())
9168 std::swap(Ops[1], Ops[2]);
9169
9170 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
9171 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
9172 llvm::Type *OpndTy = Ops[1]->getType();
9173 auto *SplatZero = Constant::getNullValue(OpndTy);
9174 Function *Sel = CGM.getIntrinsic(Intrinsic::aarch64_sve_sel, OpndTy);
9175 Ops[1] = Builder.CreateCall(Sel, {Ops[0], Ops[1], SplatZero});
9176 }
9177
9178 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
9179 getSVEOverloadTypes(TypeFlags, Ty, Ops));
9180 Value *Call = Builder.CreateCall(F, Ops);
9181
9182 // Predicate results must be converted to svbool_t.
9183 if (auto PredTy = dyn_cast<llvm::VectorType>(Call->getType()))
9184 if (PredTy->getScalarType()->isIntegerTy(1))
9185 Call = EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
9186
9187 return Call;
9188 }
9189
9190 switch (BuiltinID) {
9191 default:
9192 return nullptr;
9193
9194 case SVE::BI__builtin_sve_svmov_b_z: {
9195 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
9196 SVETypeFlags TypeFlags(Builtin->TypeModifier);
9197 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
9198 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
9199 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
9200 }
9201
9202 case SVE::BI__builtin_sve_svnot_b_z: {
9203 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
9204 SVETypeFlags TypeFlags(Builtin->TypeModifier);
9205 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
9206 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
9207 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
9208 }
9209
9210 case SVE::BI__builtin_sve_svmovlb_u16:
9211 case SVE::BI__builtin_sve_svmovlb_u32:
9212 case SVE::BI__builtin_sve_svmovlb_u64:
9213 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
9214
9215 case SVE::BI__builtin_sve_svmovlb_s16:
9216 case SVE::BI__builtin_sve_svmovlb_s32:
9217 case SVE::BI__builtin_sve_svmovlb_s64:
9218 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
9219
9220 case SVE::BI__builtin_sve_svmovlt_u16:
9221 case SVE::BI__builtin_sve_svmovlt_u32:
9222 case SVE::BI__builtin_sve_svmovlt_u64:
9223 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
9224
9225 case SVE::BI__builtin_sve_svmovlt_s16:
9226 case SVE::BI__builtin_sve_svmovlt_s32:
9227 case SVE::BI__builtin_sve_svmovlt_s64:
9228 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
9229
9230 case SVE::BI__builtin_sve_svpmullt_u16:
9231 case SVE::BI__builtin_sve_svpmullt_u64:
9232 case SVE::BI__builtin_sve_svpmullt_n_u16:
9233 case SVE::BI__builtin_sve_svpmullt_n_u64:
9234 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
9235
9236 case SVE::BI__builtin_sve_svpmullb_u16:
9237 case SVE::BI__builtin_sve_svpmullb_u64:
9238 case SVE::BI__builtin_sve_svpmullb_n_u16:
9239 case SVE::BI__builtin_sve_svpmullb_n_u64:
9240 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
9241
9242 case SVE::BI__builtin_sve_svdup_n_b8:
9243 case SVE::BI__builtin_sve_svdup_n_b16:
9244 case SVE::BI__builtin_sve_svdup_n_b32:
9245 case SVE::BI__builtin_sve_svdup_n_b64: {
9246 Value *CmpNE =
9247 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
9248 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
9249 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
9250 return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));
9251 }
9252
9253 case SVE::BI__builtin_sve_svdupq_n_b8:
9254 case SVE::BI__builtin_sve_svdupq_n_b16:
9255 case SVE::BI__builtin_sve_svdupq_n_b32:
9256 case SVE::BI__builtin_sve_svdupq_n_b64:
9257 case SVE::BI__builtin_sve_svdupq_n_u8:
9258 case SVE::BI__builtin_sve_svdupq_n_s8:
9259 case SVE::BI__builtin_sve_svdupq_n_u64:
9260 case SVE::BI__builtin_sve_svdupq_n_f64:
9261 case SVE::BI__builtin_sve_svdupq_n_s64:
9262 case SVE::BI__builtin_sve_svdupq_n_u16:
9263 case SVE::BI__builtin_sve_svdupq_n_f16:
9264 case SVE::BI__builtin_sve_svdupq_n_bf16:
9265 case SVE::BI__builtin_sve_svdupq_n_s16:
9266 case SVE::BI__builtin_sve_svdupq_n_u32:
9267 case SVE::BI__builtin_sve_svdupq_n_f32:
9268 case SVE::BI__builtin_sve_svdupq_n_s32: {
9269 // These builtins are implemented by storing each element to an array and using
9270 // ld1rq to materialize a vector.
9271 unsigned NumOpnds = Ops.size();
9272
9273 bool IsBoolTy =
9274 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
9275
9276 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
9277 // so that the compare can use the width that is natural for the expected
9278 // number of predicate lanes.
9279 llvm::Type *EltTy = Ops[0]->getType();
9280 if (IsBoolTy)
9281 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
9282
9284 for (unsigned I = 0; I < NumOpnds; ++I)
9285 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
9286 Value *Vec = BuildVector(VecOps);
9287
9288 SVETypeFlags TypeFlags(Builtin->TypeModifier);
9289 Value *Pred = EmitSVEAllTruePred(TypeFlags);
9290
9291 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
9292 Value *InsertSubVec = Builder.CreateInsertVector(
9293 OverloadedTy, UndefValue::get(OverloadedTy), Vec, Builder.getInt64(0));
9294
9295 Function *F =
9296 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
9297 Value *DupQLane =
9298 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
9299
9300 if (!IsBoolTy)
9301 return DupQLane;
9302
9303 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
9304 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
9305 : Intrinsic::aarch64_sve_cmpne_wide,
9306 OverloadedTy);
9307 Value *Call = Builder.CreateCall(
9308 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
9309 return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
9310 }
9311
9312 case SVE::BI__builtin_sve_svpfalse_b:
9313 return ConstantInt::getFalse(Ty);
9314
9315 case SVE::BI__builtin_sve_svlen_bf16:
9316 case SVE::BI__builtin_sve_svlen_f16:
9317 case SVE::BI__builtin_sve_svlen_f32:
9318 case SVE::BI__builtin_sve_svlen_f64:
9319 case SVE::BI__builtin_sve_svlen_s8:
9320 case SVE::BI__builtin_sve_svlen_s16:
9321 case SVE::BI__builtin_sve_svlen_s32:
9322 case SVE::BI__builtin_sve_svlen_s64:
9323 case SVE::BI__builtin_sve_svlen_u8:
9324 case SVE::BI__builtin_sve_svlen_u16:
9325 case SVE::BI__builtin_sve_svlen_u32:
9326 case SVE::BI__builtin_sve_svlen_u64: {
9327 SVETypeFlags TF(Builtin->TypeModifier);
9328 auto VTy = cast<llvm::VectorType>(getSVEType(TF));
9329 auto *NumEls =
9330 llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());
9331
9332 Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);
9333 return Builder.CreateMul(NumEls, Builder.CreateCall(F));
9334 }
9335
9336 case SVE::BI__builtin_sve_svtbl2_u8:
9337 case SVE::BI__builtin_sve_svtbl2_s8:
9338 case SVE::BI__builtin_sve_svtbl2_u16:
9339 case SVE::BI__builtin_sve_svtbl2_s16:
9340 case SVE::BI__builtin_sve_svtbl2_u32:
9341 case SVE::BI__builtin_sve_svtbl2_s32:
9342 case SVE::BI__builtin_sve_svtbl2_u64:
9343 case SVE::BI__builtin_sve_svtbl2_s64:
9344 case SVE::BI__builtin_sve_svtbl2_f16:
9345 case SVE::BI__builtin_sve_svtbl2_bf16:
9346 case SVE::BI__builtin_sve_svtbl2_f32:
9347 case SVE::BI__builtin_sve_svtbl2_f64: {
9348 SVETypeFlags TF(Builtin->TypeModifier);
9349 auto VTy = cast<llvm::VectorType>(getSVEType(TF));
9350 auto TupleTy = llvm::VectorType::getDoubleElementsVectorType(VTy);
9351 Function *FExtr =
9352 CGM.getIntrinsic(Intrinsic::aarch64_sve_tuple_get, {VTy, TupleTy});
9353 Value *V0 = Builder.CreateCall(FExtr, {Ops[0], Builder.getInt32(0)});
9354 Value *V1 = Builder.CreateCall(FExtr, {Ops[0], Builder.getInt32(1)});
9355 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
9356 return Builder.CreateCall(F, {V0, V1, Ops[1]});
9357 }
9358
9359 case SVE::BI__builtin_sve_svset_neonq_s8:
9360 case SVE::BI__builtin_sve_svset_neonq_s16:
9361 case SVE::BI__builtin_sve_svset_neonq_s32:
9362 case SVE::BI__builtin_sve_svset_neonq_s64:
9363 case SVE::BI__builtin_sve_svset_neonq_u8:
9364 case SVE::BI__builtin_sve_svset_neonq_u16:
9365 case SVE::BI__builtin_sve_svset_neonq_u32:
9366 case SVE::BI__builtin_sve_svset_neonq_u64:
9367 case SVE::BI__builtin_sve_svset_neonq_f16:
9368 case SVE::BI__builtin_sve_svset_neonq_f32:
9369 case SVE::BI__builtin_sve_svset_neonq_f64:
9370 case SVE::BI__builtin_sve_svset_neonq_bf16: {
9371 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0));
9372 }
9373
9374 case SVE::BI__builtin_sve_svget_neonq_s8:
9375 case SVE::BI__builtin_sve_svget_neonq_s16:
9376 case SVE::BI__builtin_sve_svget_neonq_s32:
9377 case SVE::BI__builtin_sve_svget_neonq_s64:
9378 case SVE::BI__builtin_sve_svget_neonq_u8:
9379 case SVE::BI__builtin_sve_svget_neonq_u16:
9380 case SVE::BI__builtin_sve_svget_neonq_u32:
9381 case SVE::BI__builtin_sve_svget_neonq_u64:
9382 case SVE::BI__builtin_sve_svget_neonq_f16:
9383 case SVE::BI__builtin_sve_svget_neonq_f32:
9384 case SVE::BI__builtin_sve_svget_neonq_f64:
9385 case SVE::BI__builtin_sve_svget_neonq_bf16: {
9386 return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0));
9387 }
9388
9389 case SVE::BI__builtin_sve_svdup_neonq_s8:
9390 case SVE::BI__builtin_sve_svdup_neonq_s16:
9391 case SVE::BI__builtin_sve_svdup_neonq_s32:
9392 case SVE::BI__builtin_sve_svdup_neonq_s64:
9393 case SVE::BI__builtin_sve_svdup_neonq_u8:
9394 case SVE::BI__builtin_sve_svdup_neonq_u16:
9395 case SVE::BI__builtin_sve_svdup_neonq_u32:
9396 case SVE::BI__builtin_sve_svdup_neonq_u64:
9397 case SVE::BI__builtin_sve_svdup_neonq_f16:
9398 case SVE::BI__builtin_sve_svdup_neonq_f32:
9399 case SVE::BI__builtin_sve_svdup_neonq_f64:
9400 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
9401 Value *Insert = Builder.CreateInsertVector(Ty, UndefValue::get(Ty), Ops[0],
9402 Builder.getInt64(0));
9403 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
9404 {Insert, Builder.getInt64(0)});
9405 }
9406 }
9407
9408 /// Should not happen
9409 return nullptr;
9410}
9411
9413 const CallExpr *E,
9414 llvm::Triple::ArchType Arch) {
9415 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
9416 BuiltinID <= clang::AArch64::LastSVEBuiltin)
9417 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
9418
9419 unsigned HintID = static_cast<unsigned>(-1);
9420 switch (BuiltinID) {
9421 default: break;
9422 case clang::AArch64::BI__builtin_arm_nop:
9423 HintID = 0;
9424 break;
9425 case clang::AArch64::BI__builtin_arm_yield:
9426 case clang::AArch64::BI__yield:
9427 HintID = 1;
9428 break;
9429 case clang::AArch64::BI__builtin_arm_wfe:
9430 case clang::AArch64::BI__wfe:
9431 HintID = 2;
9432 break;
9433 case clang::AArch64::BI__builtin_arm_wfi:
9434 case clang::AArch64::BI__wfi:
9435 HintID = 3;
9436 break;
9437 case clang::AArch64::BI__builtin_arm_sev:
9438 case clang::AArch64::BI__sev:
9439 HintID = 4;
9440 break;
9441 case clang::AArch64::BI__builtin_arm_sevl:
9442 case clang::AArch64::BI__sevl:
9443 HintID = 5;
9444 break;
9445 }
9446
9447 if (HintID != static_cast<unsigned>(-1)) {
9448 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
9449 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
9450 }
9451
9452 if (BuiltinID == clang::AArch64::BI__builtin_arm_prefetch) {
9454 Value *RW = EmitScalarExpr(E->getArg(1));
9455 Value *CacheLevel = EmitScalarExpr(E->getArg(2));
9456 Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
9457 Value *IsData = EmitScalarExpr(E->getArg(4));
9458
9459 Value *Locality = nullptr;
9460 if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
9461 // Temporal fetch, needs to convert cache level to locality.
9462 Locality = llvm::ConstantInt::get(Int32Ty,
9463 -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
9464 } else {
9465 // Streaming fetch.
9466 Locality = llvm::ConstantInt::get(Int32Ty, 0);
9467 }
9468
9469 // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
9470 // PLDL3STRM or PLDL2STRM.
9471 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
9472 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
9473 }
9474
9475 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
9476 assert((getContext().getTypeSize(E->getType()) == 32) &&
9477 "rbit of unusual size!");
9478 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9479 return Builder.CreateCall(
9480 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
9481 }
9482 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
9483 assert((getContext().getTypeSize(E->getType()) == 64) &&
9484 "rbit of unusual size!");
9485 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9486 return Builder.CreateCall(
9487 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
9488 }
9489
9490 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
9491 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9492 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
9493 "cls");
9494 }
9495 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
9496 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9497 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
9498 "cls");
9499 }
9500
9501 if (BuiltinID == clang::AArch64::BI__builtin_arm_frint32zf ||
9502 BuiltinID == clang::AArch64::BI__builtin_arm_frint32z) {
9503 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9504 llvm::Type *Ty = Arg->getType();
9505 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
9506 Arg, "frint32z");
9507 }
9508
9509 if (BuiltinID == clang::AArch64::BI__builtin_arm_frint64zf ||
9510 BuiltinID == clang::AArch64::BI__builtin_arm_frint64z) {
9511 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9512 llvm::Type *Ty = Arg->getType();
9513 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
9514 Arg, "frint64z");
9515 }
9516
9517 if (BuiltinID == clang::AArch64::BI__builtin_arm_frint32xf ||
9518 BuiltinID == clang::AArch64::BI__builtin_arm_frint32x) {
9519 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9520 llvm::Type *Ty = Arg->getType();
9521 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
9522 Arg, "frint32x");
9523 }
9524
9525 if (BuiltinID == clang::AArch64::BI__builtin_arm_frint64xf ||
9526 BuiltinID == clang::AArch64::BI__builtin_arm_frint64x) {
9527 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9528 llvm::Type *Ty = Arg->getType();
9529 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
9530 Arg, "frint64x");
9531 }
9532
9533 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
9534 assert((getContext().getTypeSize(E->getType()) == 32) &&
9535 "__jcvt of unusual size!");
9536 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9537 return Builder.CreateCall(
9538 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
9539 }
9540
9541 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
9542 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
9543 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
9544 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
9545 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
9546 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
9547
9548 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
9549 // Load from the address via an LLVM intrinsic, receiving a
9550 // tuple of 8 i64 words, and store each one to ValPtr.
9551 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
9552 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
9553 llvm::Value *ToRet;
9554 for (size_t i = 0; i < 8; i++) {
9555 llvm::Value *ValOffsetPtr =
9556 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
9557 Address Addr =
9558 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
9559 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
9560 }
9561 return ToRet;
9562 } else {
9563 // Load 8 i64 words from ValPtr, and store them to the address
9564 // via an LLVM intrinsic.
9566 Args.push_back(MemAddr);
9567 for (size_t i = 0; i < 8; i++) {
9568 llvm::Value *ValOffsetPtr =
9569 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
9570 Address Addr =
9571 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
9572 Args.push_back(Builder.CreateLoad(Addr));
9573 }
9574
9575 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
9576 ? Intrinsic::aarch64_st64b
9577 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
9578 ? Intrinsic::aarch64_st64bv
9579 : Intrinsic::aarch64_st64bv0);
9580 Function *F = CGM.getIntrinsic(Intr);
9581 return Builder.CreateCall(F, Args);
9582 }
9583 }
9584
9585 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
9586 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
9587
9588 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
9589 ? Intrinsic::aarch64_rndr
9590 : Intrinsic::aarch64_rndrrs);
9591 Function *F = CGM.getIntrinsic(Intr);
9592 llvm::Value *Val = Builder.CreateCall(F);
9593 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
9594 Value *Status = Builder.CreateExtractValue(Val, 1);
9595
9596 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
9597 Builder.CreateStore(RandomValue, MemAddress);
9598 Status = Builder.CreateZExt(Status, Int32Ty);
9599 return Status;
9600 }
9601
9602 if (BuiltinID == clang::AArch64::BI__clear_cache) {
9603 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
9604 const FunctionDecl *FD = E->getDirectCallee();
9605 Value *Ops[2];
9606 for (unsigned i = 0; i < 2; i++)
9607 Ops[i] = EmitScalarExpr(E->getArg(i));
9608 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
9609 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
9610 StringRef Name = FD->getName();
9611 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
9612 }
9613
9614 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
9615 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
9616 getContext().getTypeSize(E->getType()) == 128) {
9617 Function *F =
9618 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
9619 ? Intrinsic::aarch64_ldaxp
9620 : Intrinsic::aarch64_ldxp);
9621
9622 Value *LdPtr = EmitScalarExpr(E->getArg(0));
9623 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
9624 "ldxp");
9625
9626 Value *Val0 = Builder.CreateExtractValue(Val, 1);
9627 Value *Val1 = Builder.CreateExtractValue(Val, 0);
9628 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
9629 Val0 = Builder.CreateZExt(Val0, Int128Ty);
9630 Val1 = Builder.CreateZExt(Val1, Int128Ty);
9631
9632 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
9633 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
9634 Val = Builder.CreateOr(Val, Val1);
9635 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
9636 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
9637 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
9638 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
9639
9640 QualType Ty = E->getType();
9641 llvm::Type *RealResTy = ConvertType(Ty);
9642 llvm::Type *IntTy =
9643 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
9644 llvm::Type *PtrTy = IntTy->getPointerTo();
9645 LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
9646
9647 Function *F =
9648 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
9649 ? Intrinsic::aarch64_ldaxr
9650 : Intrinsic::aarch64_ldxr,
9651 PtrTy);
9652 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
9653 Val->addParamAttr(
9654 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
9655
9656 if (RealResTy->isPointerTy())
9657 return Builder.CreateIntToPtr(Val, RealResTy);
9658
9659 llvm::Type *IntResTy = llvm::IntegerType::get(
9660 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
9661 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
9662 RealResTy);
9663 }
9664
9665 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
9666 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
9667 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
9668 Function *F =
9669 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
9670 ? Intrinsic::aarch64_stlxp
9671 : Intrinsic::aarch64_stxp);
9672 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
9673
9674 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
9675 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
9676
9677 Tmp = Builder.CreateElementBitCast(Tmp, STy);
9678 llvm::Value *Val = Builder.CreateLoad(Tmp);
9679
9680 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
9681 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
9682 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
9683 Int8PtrTy);
9684 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
9685 }
9686
9687 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
9688 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
9689 Value *StoreVal = EmitScalarExpr(E->getArg(0));
9690 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
9691
9692 QualType Ty = E->getArg(0)->getType();
9693 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
9694 getContext().getTypeSize(Ty));
9695 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
9696
9697 if (StoreVal->getType()->isPointerTy())
9698 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
9699 else {
9700 llvm::Type *IntTy = llvm::IntegerType::get(
9702 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
9703 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
9704 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
9705 }
9706
9707 Function *F =
9708 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
9709 ? Intrinsic::aarch64_stlxr
9710 : Intrinsic::aarch64_stxr,
9711 StoreAddr->getType());
9712 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
9713 CI->addParamAttr(
9714 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
9715 return CI;
9716 }
9717
9718 if (BuiltinID == clang::AArch64::BI__getReg) {
9720 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
9721 llvm_unreachable("Sema will ensure that the parameter is constant");
9722
9723 llvm::APSInt Value = Result.Val.getInt();
9724 LLVMContext &Context = CGM.getLLVMContext();
9725 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
9726
9727 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
9728 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
9729 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
9730
9731 llvm::Function *F =
9732 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
9733 return Builder.CreateCall(F, Metadata);
9734 }
9735
9736 if (BuiltinID == clang::AArch64::BI__break) {
9738 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
9739 llvm_unreachable("Sema will ensure that the parameter is constant");
9740
9741 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break);
9742 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
9743 }
9744
9745 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
9746 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
9747 return Builder.CreateCall(F);
9748 }
9749
9750 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
9751 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
9752 llvm::SyncScope::SingleThread);
9753
9754 // CRC32
9755 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
9756 switch (BuiltinID) {
9757 case clang::AArch64::BI__builtin_arm_crc32b:
9758 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
9759 case clang::AArch64::BI__builtin_arm_crc32cb:
9760 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
9761 case clang::AArch64::BI__builtin_arm_crc32h:
9762 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
9763 case clang::AArch64::BI__builtin_arm_crc32ch:
9764 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
9765 case clang::AArch64::BI__builtin_arm_crc32w:
9766 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
9767 case clang::AArch64::BI__builtin_arm_crc32cw:
9768 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
9769 case clang::AArch64::BI__builtin_arm_crc32d:
9770 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
9771 case clang::AArch64::BI__builtin_arm_crc32cd:
9772 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
9773 }
9774
9775 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
9776 Value *Arg0 = EmitScalarExpr(E->getArg(0));
9777 Value *Arg1 = EmitScalarExpr(E->getArg(1));
9778 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
9779
9780 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
9781 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
9782
9783 return Builder.CreateCall(F, {Arg0, Arg1});
9784 }
9785
9786 // Memory Operations (MOPS)
9787 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
9788 Value *Dst = EmitScalarExpr(E->getArg(0));
9789 Value *Val = EmitScalarExpr(E->getArg(1));
9790 Value *Size = EmitScalarExpr(E->getArg(2));
9791 Dst = Builder.CreatePointerCast(Dst, Int8PtrTy);
9792 Val = Builder.CreateTrunc(Val, Int8Ty);
9793 Size = Builder.CreateIntCast(Size, Int64Ty, false);
9794 return Builder.CreateCall(
9795 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
9796 }
9797
9798 // Memory Tagging Extensions (MTE) Intrinsics
9799 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
9800 switch (BuiltinID) {
9801 case clang::AArch64::BI__builtin_arm_irg:
9802 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
9803 case clang::AArch64::BI__builtin_arm_addg:
9804 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
9805 case clang::AArch64::BI__builtin_arm_gmi:
9806 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
9807 case clang::AArch64::BI__builtin_arm_ldg:
9808 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
9809 case clang::AArch64::BI__builtin_arm_stg:
9810 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
9811 case clang::AArch64::BI__builtin_arm_subp:
9812 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
9813 }
9814
9815 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
9816 llvm::Type *T = ConvertType(E->getType());
9817
9818 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
9820 Value *Mask = EmitScalarExpr(E->getArg(1));
9821
9822 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
9823 Mask = Builder.CreateZExt(Mask, Int64Ty);
9824 Value *RV = Builder.CreateCall(
9825 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, Mask});
9826 return Builder.CreatePointerCast(RV, T);
9827 }
9828 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
9830 Value *TagOffset = EmitScalarExpr(E->getArg(1));
9831
9832 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
9833 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
9834 Value *RV = Builder.CreateCall(
9835 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, TagOffset});
9836 return Builder.CreatePointerCast(RV, T);
9837 }
9838 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
9840 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
9841
9842 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
9843 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
9844 return Builder.CreateCall(
9845 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
9846 }
9847 // Although it is possible to supply a different return
9848 // address (first arg) to this intrinsic, for now we set
9849 // return address same as input address.
9850 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
9851 Value *TagAddress = EmitScalarExpr(E->getArg(0));
9852 TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
9853 Value *RV = Builder.CreateCall(
9854 CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
9855 return Builder.CreatePointerCast(RV, T);
9856 }
9857 // Although it is possible to supply a different tag (to set)
9858 // to this intrinsic (as first arg), for now we supply
9859 // the tag that is in input address arg (common use case).
9860 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
9861 Value *TagAddress = EmitScalarExpr(E->getArg(0));
9862 TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
9863 return Builder.CreateCall(
9864 CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
9865 }
9866 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
9867 Value *PointerA = EmitScalarExpr(E->getArg(0));
9868 Value *PointerB = EmitScalarExpr(E->getArg(1));
9869 PointerA = Builder.CreatePointerCast(PointerA, Int8PtrTy);
9870 PointerB = Builder.CreatePointerCast(PointerB, Int8PtrTy);
9871 return Builder.CreateCall(
9872 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
9873 }
9874 }
9875
9876 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
9877 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
9878 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
9879 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
9880 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
9881 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
9882
9883 SpecialRegisterAccessKind AccessKind = Write;
9884 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
9885 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
9886 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
9887 AccessKind = VolatileRead;
9888
9889 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
9890 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
9891
9892 bool Is64Bit = BuiltinID != clang::AArch64::BI__builtin_arm_rsr &&
9893 BuiltinID != clang::AArch64::BI__builtin_arm_wsr;
9894
9895 llvm::Type *ValueType;
9896 llvm::Type *RegisterType = Int64Ty;
9897 if (IsPointerBuiltin) {
9898 ValueType = VoidPtrTy;
9899 } else if (Is64Bit) {
9900 ValueType = Int64Ty;
9901 } else {
9902 ValueType = Int32Ty;
9903 }
9904
9905 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
9906 AccessKind);
9907 }
9908
9909 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
9910 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
9911 LLVMContext &Context = CGM.getLLVMContext();
9912
9913 unsigned SysReg =
9914 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
9915
9916 std::string SysRegStr;
9917 llvm::raw_string_ostream(SysRegStr) <<
9918 ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
9919 ((SysReg >> 11) & 7) << ":" <<
9920 ((SysReg >> 7) & 15) << ":" <<
9921 ((SysReg >> 3) & 15) << ":" <<
9922 ( SysReg & 7);
9923
9924 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
9925 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
9926 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
9927
9928 llvm::Type *RegisterType = Int64Ty;
9929 llvm::Type *Types[] = { RegisterType };
9930
9931 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
9932 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
9933
9934 return Builder.CreateCall(F, Metadata);
9935 }
9936
9937 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
9938 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
9939
9940 return Builder.CreateCall(F, { Metadata, ArgValue });
9941 }
9942
9943 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
9944 llvm::Function *F =
9945 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
9946 return Builder.CreateCall(F);
9947 }
9948
9949 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
9950 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
9951 return Builder.CreateCall(F);
9952 }
9953
9954 if (BuiltinID == clang::AArch64::BI__mulh ||
9955 BuiltinID == clang::AArch64::BI__umulh) {
9956 llvm::Type *ResType = ConvertType(E->getType());
9957 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
9958
9959 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
9960 Value *LHS =
9961 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
9962 Value *RHS =
9963 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
9964
9965 Value *MulResult, *HigherBits;
9966 if (IsSigned) {
9967 MulResult = Builder.CreateNSWMul(LHS, RHS);
9968 HigherBits = Builder.CreateAShr(MulResult, 64);
9969 } else {
9970 MulResult = Builder.CreateNUWMul(LHS, RHS);
9971 HigherBits = Builder.CreateLShr(MulResult, 64);
9972 }
9973 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
9974
9975 return HigherBits;
9976 }
9977
9978 if (BuiltinID == AArch64::BI__writex18byte ||
9979 BuiltinID == AArch64::BI__writex18word ||
9980 BuiltinID == AArch64::BI__writex18dword ||
9981 BuiltinID == AArch64::BI__writex18qword) {
9982 llvm::Type *IntTy = ConvertType(E->getArg(1)->getType());
9983
9984 // Read x18 as i8*
9985 LLVMContext &Context = CGM.getLLVMContext();
9986 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
9987 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
9988 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
9989 llvm::Function *F =
9990 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
9991 llvm::Value *X18 = Builder.CreateCall(F, Metadata);
9992 X18 = Builder.CreateIntToPtr(X18, llvm::PointerType::get(Int8Ty, 0));
9993
9994 // Store val at x18 + offset
9995 Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
9996 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
9997 Ptr = Builder.CreatePointerCast(Ptr, llvm::PointerType::get(IntTy, 0));
9998 Value *Val = EmitScalarExpr(E->getArg(1));
9999 StoreInst *Store = Builder.CreateAlignedStore(Val, Ptr, CharUnits::One());
10000 return Store;
10001 }
10002
10003 if (BuiltinID == AArch64::BI__readx18byte ||
10004 BuiltinID == AArch64::BI__readx18word ||
10005 BuiltinID == AArch64::BI__readx18dword ||
10006 BuiltinID == AArch64::BI__readx18qword) {
10007 llvm::Type *IntTy = ConvertType(E->getType());
10008
10009 // Read x18 as i8*
10010 LLVMContext &Context = CGM.getLLVMContext();
10011 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
10012 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
10013 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
10014 llvm::Function *F =
10015 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
10016 llvm::Value *X18 = Builder.CreateCall(F, Metadata);
10017 X18 = Builder.CreateIntToPtr(X18, llvm::PointerType::get(Int8Ty, 0));
10018
10019 // Load x18 + offset
10020 Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
10021 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
10022 Ptr = Builder.CreatePointerCast(Ptr, llvm::PointerType::get(IntTy, 0));
10023 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
10024 return Load;
10025 }
10026
10027 // Handle MSVC intrinsics before argument evaluation to prevent double
10028 // evaluation.
10029 if (Optional<MSVCIntrin> MsvcIntId = translateAarch64ToMsvcIntrin(BuiltinID))
10030 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
10031
10032 // Find out if any arguments are required to be integer constant
10033 // expressions.
10034 unsigned ICEArguments = 0;
10036 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
10037 assert(Error == ASTContext::GE_None && "Should not codegen an error");
10038
10040 Address PtrOp0 = Address::invalid();
10041 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
10042 if (i == 0) {
10043 switch (BuiltinID) {
10044 case NEON::BI__builtin_neon_vld1_v:
10045 case NEON::BI__builtin_neon_vld1q_v:
10046 case NEON::BI__builtin_neon_vld1_dup_v:
10047 case NEON::BI__builtin_neon_vld1q_dup_v:
10048 case NEON::BI__builtin_neon_vld1_lane_v:
10049 case NEON::BI__builtin_neon_vld1q_lane_v:
10050 case NEON::BI__builtin_neon_vst1_v:
10051 case NEON::BI__builtin_neon_vst1q_v:
10052 case NEON::BI__builtin_neon_vst1_lane_v:
10053 case NEON::BI__builtin_neon_vst1q_lane_v:
10054 // Get the alignment for the argument in addition to the value;
10055 // we'll use it later.
10056 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
10057 Ops.push_back(PtrOp0.getPointer());
10058 continue;
10059 }
10060 }
10061 if ((ICEArguments & (1 << i)) == 0) {
10062 Ops.push_back(EmitScalarExpr(E->getArg(i)));
10063 } else {
10064 // If this is required to be a constant, constant fold it so that we know
10065 // that the generated intrinsic gets a ConstantInt.
10066 Ops.push_back(llvm::ConstantInt::get(
10069 }
10070 }
10071
10072 auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
10073 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
10074 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
10075
10076 if (Builtin) {
10077 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
10078 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
10079 assert(Result && "SISD intrinsic should have been handled");
10080 return Result;
10081 }
10082
10083 const Expr *Arg = E->getArg(E->getNumArgs()-1);
10086 // Determine the type of this overloaded NEON intrinsic.
10087 Type = NeonTypeFlags(Result->getZExtValue());
10088
10089 bool usgn = Type.isUnsigned();
10090 bool quad = Type.isQuad();
10091
10092 // Handle non-overloaded intrinsics first.
10093 switch (BuiltinID) {
10094 default: break;
10095 case NEON::BI__builtin_neon_vabsh_f16:
10096 Ops.push_back(EmitScalarExpr(E->getArg(0)));
10097 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
10098 case NEON::BI__builtin_neon_vaddq_p128: {
10099 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
10100 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10101 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
10102 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
10103 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
10104 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
10105 return Builder.CreateBitCast(Ops[0], Int128Ty);
10106 }
10107 case NEON::BI__builtin_neon_vldrq_p128: {
10108 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
10109 llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
10110 Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
10111 return Builder.CreateAlignedLoad(Int128Ty, Ptr,
10113 }
10114 case NEON::BI__builtin_neon_vstrq_p128: {
10115 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
10116 Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
10117 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
10118 }
10119 case NEON::BI__builtin_neon_vcvts_f32_u32:
10120 case NEON::BI__builtin_neon_vcvtd_f64_u64:
10121 usgn = true;
10122 LLVM_FALLTHROUGH;
10123 case NEON::BI__builtin_neon_vcvts_f32_s32:
10124 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
10125 Ops.push_back(EmitScalarExpr(E->getArg(0)));
10126 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
10127 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
10128 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
10129 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
10130 if (usgn)
10131 return Builder.CreateUIToFP(Ops[0], FTy);
10132 return Builder.CreateSIToFP(Ops[0], FTy);
10133 }
10134 case NEON::BI__builtin_neon_vcvth_f16_u16:
10135 case NEON::BI__builtin_neon_vcvth_f16_u32:
10136 case NEON::BI__builtin_neon_vcvth_f16_u64:
10137 usgn = true;
10138 LLVM_FALLTHROUGH;
10139 case NEON::BI__builtin_neon_vcvth_f16_s16:
10140 case NEON::BI__builtin_neon_vcvth_f16_s32:
10141 case NEON::BI__builtin_neon_vcvth_f16_s64: {
10142 Ops.push_back(EmitScalarExpr(E->getArg(0)));
10143 llvm::Type *FTy = HalfTy;
10144 llvm::Type *InTy;
10145 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
10146 InTy = Int64Ty;
10147 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
10148 InTy = Int32Ty;
10149 else
10150 InTy = Int16Ty;
10151 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
10152 if (usgn)
10153 return Builder.CreateUIToFP(Ops[0], FTy);
10154 return Builder.CreateSIToFP(Ops[0], FTy);
10155 }
10156 case NEON::BI__builtin_neon_vcvtah_u16_f16:
10157 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
10158 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
10159 case NEON::BI__builtin_neon_vcvtph_u16_f16:
10160 case NEON::BI__builtin_neon_vcvth_u16_f16:
10161 case NEON::BI__builtin_neon_vcvtah_s16_f16:
10162 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
10163 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
10164 case NEON::BI__builtin_neon_vcvtph_s16_f16:
10165 case NEON::BI__builtin_neon_vcvth_s16_f16: {
10166 unsigned Int;
10167 llvm::Type* InTy = Int32Ty;
10168 llvm::Type* FTy = HalfTy;
10169 llvm::Type *Tys[2] = {InTy, FTy};
10170 Ops.push_back(EmitScalarExpr(E->getArg(0)));
10171 switch (BuiltinID) {
10172 default: llvm_unreachable("missing builtin ID in switch!");
10173 case NEON::BI__builtin_neon_vcvtah_u16_f16:
10174 Int = Intrinsic::aarch64_neon_fcvtau; break;
10175 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
10176 Int = Intrinsic::aarch64_neon_fcvtmu; break;
10177 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
10178 Int = Intrinsic::aarch64_neon_fcvtnu; break;
10179 case NEON::BI__builtin_neon_vcvtph_u16_f16:
10180 Int = Intrinsic::aarch64_neon_fcvtpu; break;
10181 case NEON::BI__builtin_neon_vcvth_u16_f16:
10182 Int = Intrinsic::aarch64_neon_fcvtzu; break;
10183 case NEON::BI__builtin_neon_vcvtah_s16_f16:
10184 Int = Intrinsic::aarch64_neon_fcvtas; break;
10185 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
10186 Int = Intrinsic::aarch64_neon_fcvtms; break;
10187 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
10188 Int = Intrinsic::aarch64_neon_fcvtns; break;
10189 case NEON::BI__builtin_neon_vcvtph_s16_f16:
10190 Int = Intrinsic::aarch64_neon_fcvtps; break;
10191 case NEON::BI__builtin_neon_vcvth_s16_f16:
10192 Int = Intrinsic::aarch64_neon_fcvtzs; break;
10193 }
10194 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
10195 return Builder.CreateTrunc(Ops[0], Int16Ty);
10196 }
10197 case NEON::BI__builtin_neon_vcaleh_f16:
10198 case NEON::BI__builtin_neon_vcalth_f16:
10199 case NEON::BI__builtin_neon_vcageh_f16:
10200 case NEON::BI__builtin_neon_vcagth_f16: {
10201 unsigned Int;
10202 llvm::Type* InTy = Int32Ty;
10203 llvm::Type* FTy = HalfTy;
10204 llvm::Type *Tys[2] = {InTy, FTy};
10205 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10206 switch (BuiltinID) {
10207 default: llvm_unreachable("missing builtin ID in switch!");
10208 case NEON::BI__builtin_neon_vcageh_f16:
10209 Int = Intrinsic::aarch64_neon_facge; break;
10210 case NEON::BI__builtin_neon_vcagth_f16:
10211 Int = Intrinsic::aarch64_neon_facgt; break;
10212 case NEON::BI__builtin_neon_vcaleh_f16:
10213 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
10214 case NEON::BI__builtin_neon_vcalth_f16:
10215 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
10216 }
10217 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
10218 return Builder.CreateTrunc(Ops[0], Int16Ty);
10219 }
10220 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
10221 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
10222 unsigned Int;
10223 llvm::Type* InTy = Int32Ty;
10224 llvm::Type* FTy = HalfTy;
10225 llvm::Type *Tys[2] = {InTy, FTy};
10226 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10227 switch (BuiltinID) {
10228 default: llvm_unreachable("missing builtin ID in switch!");
10229 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
10230 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
10231 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
10232 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
10233 }
10234 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
10235 return Builder.CreateTrunc(Ops[0], Int16Ty);
10236 }
10237 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
10238 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
10239 unsigned Int;
10240 llvm::Type* FTy = HalfTy;
10241 llvm::Type* InTy = Int32Ty;
10242 llvm::Type *Tys[2] = {FTy, InTy};
10243 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10244 switch (BuiltinID) {
10245 default: llvm_unreachable("missing builtin ID in switch!");
10246 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
10247 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
10248 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
10249 break;
10250 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
10251 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
10252 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
10253 break;
10254 }
10255 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
10256 }
10257 case NEON::BI__builtin_neon_vpaddd_s64: {
10258 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
10259 Value *Vec = EmitScalarExpr(E->getArg(0));
10260 // The vector is v2f64, so make sure it's bitcast to that.
10261 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
10262 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
10263 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
10264 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
10265 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
10266 // Pairwise addition of a v2f64 into a scalar f64.
10267 return Builder.CreateAdd(Op0, Op1, "vpaddd");
10268 }
10269 case NEON::BI__builtin_neon_vpaddd_f64: {
10270 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
10271 Value *Vec = EmitScalarExpr(E->getArg(0));
10272 // The vector is v2f64, so make sure it's bitcast to that.
10273 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
10274 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
10275 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
10276 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
10277 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
10278 // Pairwise addition of a v2f64 into a scalar f64.
10279 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
10280 }
10281 case NEON::BI__builtin_neon_vpadds_f32: {
10282 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
10283 Value *Vec = EmitScalarExpr(E->getArg(0));
10284 // The vector is v2f32, so make sure it's bitcast to that.
10285 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
10286 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
10287 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
10288 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
10289 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
10290 // Pairwise addition of a v2f32 into a scalar f32.
10291 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
10292 }
10293 case NEON::BI__builtin_neon_vceqzd_s64:
10294 case NEON::BI__builtin_neon_vceqzd_f64:
10295 case NEON::BI__builtin_neon_vceqzs_f32:
10296 case NEON::BI__builtin_neon_vceqzh_f16:
10297 Ops.push_back(EmitScalarExpr(E->getArg(0)));
10300 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
10301 case NEON::BI__builtin_neon_vcgezd_s64:
10302 case NEON::BI__builtin_neon_vcgezd_f64:
10303 case NEON::BI__builtin_neon_vcgezs_f32:
10304 case NEON::BI__builtin_neon_vcgezh_f16:
10305 Ops.push_back(EmitScalarExpr(E->getArg(0)));
10308 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
10309 case NEON::BI__builtin_neon_vclezd_s64:
10310 case NEON::BI__builtin_neon_vclezd_f64:
10311 case NEON::BI__builtin_neon_vclezs_f32:
10312 case NEON::BI__builtin_neon_vclezh_f16:
10313 Ops.push_back(EmitScalarExpr(E->getArg(0)));
10316 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
10317 case NEON::BI__builtin_neon_vcgtzd_s64:
10318 case NEON::BI__builtin_neon_vcgtzd_f64:
10319 case NEON::BI__builtin_neon_vcgtzs_f32:
10320 case NEON::BI__builtin_neon_vcgtzh_f16:
10321 Ops.push_back(EmitScalarExpr(E->getArg(0)));
10324 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
10325 case NEON::BI__builtin_neon_vcltzd_s64:
10326 case NEON::BI__builtin_neon_vcltzd_f64:
10327 case NEON::BI__builtin_neon_vcltzs_f32:
10328 case NEON::BI__builtin_neon_vcltzh_f16:
10329 Ops.push_back(EmitScalarExpr(E->getArg(0)));
10332 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
10333
10334 case NEON::BI__builtin_neon_vceqzd_u64: {
10335 Ops.push_back(EmitScalarExpr(E->getArg(0)));
10336 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
10337 Ops[0] =
10338 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
10339 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
10340 }
10341 case NEON::BI__builtin_neon_vceqd_f64:
10342 case NEON::BI__builtin_neon_vcled_f64:
10343 case NEON::BI__builtin_neon_vcltd_f64:
10344 case NEON::BI__builtin_neon_vcged_f64:
10345 case NEON::BI__builtin_neon_vcgtd_f64: {
10346 llvm::CmpInst::Predicate P;
10347 switch (BuiltinID) {
10348 default: llvm_unreachable("missing builtin ID in switch!");
10349 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
10350 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
10351 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
10352 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
10353 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
10354 }
10355 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10356 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
10357 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
10358 if (P == llvm::FCmpInst::FCMP_OEQ)
10359 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
10360 else
10361 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
10362 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
10363 }
10364 case NEON::BI__builtin_neon_vceqs_f32:
10365 case NEON::BI__builtin_neon_vcles_f32:
10366 case NEON::BI__builtin_neon_vclts_f32:
10367 case NEON::BI__builtin_neon_vcges_f32:
10368 case NEON::BI__builtin_neon_vcgts_f32: {
10369 llvm::CmpInst::Predicate P;
10370 switch (BuiltinID) {
10371 default: llvm_unreachable("missing builtin ID in switch!");
10372 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
10373 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
10374 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
10375 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
10376 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
10377 }
10378 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10379 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
10380 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
10381 if (P == llvm::FCmpInst::FCMP_OEQ)
10382 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
10383 else
10384 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
10385 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
10386 }
10387 case NEON::BI__builtin_neon_vceqh_f16:
10388 case NEON::BI__builtin_neon_vcleh_f16:
10389 case NEON::BI__builtin_neon_vclth_f16:
10390 case NEON::BI__builtin_neon_vcgeh_f16:
10391 case NEON::BI__builtin_neon_vcgth_f16: {
10392 llvm::CmpInst::Predicate P;
10393 switch (BuiltinID) {
10394 default: llvm_unreachable("missing builtin ID in switch!");
10395 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
10396 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
10397 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
10398 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
10399 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
10400 }
10401 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10402 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
10403 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
10404 if (P == llvm::FCmpInst::FCMP_OEQ)
10405 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
10406 else
10407 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
10408 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
10409 }
10410 case NEON::BI__builtin_neon_vceqd_s64:
10411 case NEON::BI__builtin_neon_vceqd_u64:
10412 case NEON::BI__builtin_neon_vcgtd_s64:
10413 case NEON::BI__builtin_neon_vcgtd_u64:
10414 case NEON::BI__builtin_neon_vcltd_s64:
10415 case NEON::BI__builtin_neon_vcltd_u64:
10416 case NEON::BI__builtin_neon_vcged_u64:
10417 case NEON::BI__builtin_neon_vcged_s64:
10418 case NEON::BI__builtin_neon_vcled_u64:
10419 case NEON::BI__builtin_neon_vcled_s64: {
10420 llvm::CmpInst::Predicate P;
10421 switch (BuiltinID) {
10422 default: llvm_unreachable("missing builtin ID in switch!");
10423 case NEON::BI__builtin_neon_vceqd_s64:
10424 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
10425 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
10426 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
10427 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
10428 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
10429 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
10430 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
10431 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
10432 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
10433 }
10434 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10435 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
10436 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
10437 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
10438 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
10439 }
10440 case NEON::BI__builtin_neon_vtstd_s64:
10441 case NEON::BI__builtin_neon_vtstd_u64: {
10442 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10443 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
10444 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
10445 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
10446 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
10447 llvm::Constant::getNullValue(Int64Ty));
10448 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
10449 }
10450 case NEON::BI__builtin_neon_vset_lane_i8:
10451 case NEON::BI__builtin_neon_vset_lane_i16:
10452 case NEON::BI__builtin_neon_vset_lane_i32:
10453 case NEON::BI__builtin_neon_vset_lane_i64:
10454 case NEON::BI__builtin_neon_vset_lane_bf16:
10455 case NEON::BI__builtin_neon_vset_lane_f32:
10456 case NEON::BI__builtin_neon_vsetq_lane_i8:
10457 case NEON::BI__builtin_neon_vsetq_lane_i16:
10458 case NEON::BI__builtin_neon_vsetq_lane_i32:
10459 case NEON::BI__builtin_neon_vsetq_lane_i64:
10460 case NEON::BI__builtin_neon_vsetq_lane_bf16:
10461 case NEON::BI__builtin_neon_vsetq_lane_f32:
10462 Ops.push_back(EmitScalarExpr(E->getArg(2)));
10463 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
10464 case NEON::BI__builtin_neon_vset_lane_f64:
10465 // The vector type needs a cast for the v1f64 variant.
10466 Ops[1] =
10467 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
10468 Ops.push_back(EmitScalarExpr(E->getArg(2)));
10469 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
10470 case NEON::BI__builtin_neon_vsetq_lane_f64:
10471 // The vector type needs a cast for the v2f64 variant.
10472 Ops[1] =
10473 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
10474 Ops.push_back(EmitScalarExpr(E->getArg(2)));
10475 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
10476
10477 case NEON::BI__builtin_neon_vget_lane_i8:
10478 case NEON::BI__builtin_neon_vdupb_lane_i8:
10479 Ops[0] =
10480 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
10481 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
10482 "vget_lane");
10483 case NEON::BI__builtin_neon_vgetq_lane_i8:
10484 case NEON::BI__builtin_neon_vdupb_laneq_i8:
10485 Ops[0] =
10486 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
10487 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
10488 "vgetq_lane");
10489 case NEON::BI__builtin_neon_vget_lane_i16:
10490 case NEON::BI__builtin_neon_vduph_lane_i16:
10491 Ops[0] =
10492 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
10493 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
10494 "vget_lane");
10495 case NEON::BI__builtin_neon_vgetq_lane_i16:
10496 case NEON::BI__builtin_neon_vduph_laneq_i16:
10497 Ops[0] =
10498 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
10499 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
10500 "vgetq_lane");
10501 case NEON::BI__builtin_neon_vget_lane_i32:
10502 case NEON::BI__builtin_neon_vdups_lane_i32:
10503 Ops[0] =
10504 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
10505 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
10506 "vget_lane");
10507 case NEON::BI__builtin_neon_vdups_lane_f32:
10508 Ops[0] =
10509 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
10510 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
10511 "vdups_lane");
10512 case NEON::BI__builtin_neon_vgetq_lane_i32:
10513 case NEON::BI__builtin_neon_vdups_laneq_i32:
10514 Ops[0] =
10515 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
10516 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
10517 "vgetq_lane");
10518 case NEON::BI__builtin_neon_vget_lane_i64:
10519 case NEON::BI__builtin_neon_vdupd_lane_i64:
10520 Ops[0] =
10521 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
10522 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
10523 "vget_lane");
10524 case NEON::BI__builtin_neon_vdupd_lane_f64:
10525 Ops[0] =
10526 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
10527 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
10528 "vdupd_lane");
10529 case NEON::BI__builtin_neon_vgetq_lane_i64:
10530 case NEON::BI__builtin_neon_vdupd_laneq_i64:
10531 Ops[0] =
10532 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
10533 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
10534 "vgetq_lane");
10535 case NEON::BI__builtin_neon_vget_lane_f32:
10536 Ops[0] =
10537 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
10538 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
10539 "vget_lane");
10540 case NEON::BI__builtin_neon_vget_lane_f64:
10541 Ops[0] =
10542 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
10543 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
10544 "vget_lane");
10545 case NEON::BI__builtin_neon_vgetq_lane_f32:
10546 case NEON::BI__builtin_neon_vdups_laneq_f32:
10547 Ops[0] =
10548 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
10549 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
10550 "vgetq_lane");
10551 case NEON::BI__builtin_neon_vgetq_lane_f64:
10552 case NEON::BI__builtin_neon_vdupd_laneq_f64:
10553 Ops[0] =
10554 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
10555 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
10556 "vgetq_lane");
10557 case NEON::BI__builtin_neon_vaddh_f16:
10558 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10559 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
10560 case NEON::BI__builtin_neon_vsubh_f16:
10561 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10562 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
10563 case NEON::BI__builtin_neon_vmulh_f16:
10564 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10565 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
10566 case NEON::BI__builtin_neon_vdivh_f16:
10567 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10568 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
10569 case NEON::BI__builtin_neon_vfmah_f16:
10570 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
10572 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
10573 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
10574 case NEON::BI__builtin_neon_vfmsh_f16: {
10575 // FIXME: This should be an fneg instruction:
10576 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy);
10577 Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh");
10578
10579 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
10581 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
10582 {Sub, EmitScalarExpr(E->getArg(2)), Ops[0]});
10583 }
10584 case NEON::BI__builtin_neon_vaddd_s64:
10585 case NEON::BI__builtin_neon_vaddd_u64:
10586 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
10587 case NEON::BI__builtin_neon_vsubd_s64:
10588 case NEON::BI__builtin_neon_vsubd_u64:
10589 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
10590 case NEON::BI__builtin_neon_vqdmlalh_s16:
10591 case NEON::BI__builtin_neon_vqdmlslh_s16: {
10592 SmallVector<Value *, 2> ProductOps;
10593 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
10594 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
10595 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
10596 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
10597 ProductOps, "vqdmlXl");
10598 Constant *CI = ConstantInt::get(SizeTy, 0);
10599 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
10600
10601 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
10602 ? Intrinsic::aarch64_neon_sqadd
10603 : Intrinsic::aarch64_neon_sqsub;
10604 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
10605 }
10606 case NEON::BI__builtin_neon_vqshlud_n_s64: {
10607 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10608 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
10609 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
10610 Ops, "vqshlu_n");
10611 }
10612 case NEON::BI__builtin_neon_vqshld_n_u64:
10613 case NEON::BI__builtin_neon_vqshld_n_s64: {
10614 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
10615 ? Intrinsic::aarch64_neon_uqshl
10616 : Intrinsic::aarch64_neon_sqshl;
10617 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10618 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
10619 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
10620 }
10621 case NEON::BI__builtin_neon_vrshrd_n_u64:
10622 case NEON::BI__builtin_neon_vrshrd_n_s64: {
10623 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
10624 ? Intrinsic::aarch64_neon_urshl
10625 : Intrinsic::aarch64_neon_srshl;
10626 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10627 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
10628 Ops[1] = ConstantInt::get(Int64Ty, -SV);
10629 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
10630 }
10631 case NEON::BI__builtin_neon_vrsrad_n_u64:
10632 case NEON::BI__builtin_neon_vrsrad_n_s64: {
10633 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
10634 ? Intrinsic::aarch64_neon_urshl
10635 : Intrinsic::aarch64_neon_srshl;
10636 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
10637 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
10638 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
10639 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
10640 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
10641 }
10642 case NEON::BI__builtin_neon_vshld_n_s64:
10643 case NEON::BI__builtin_neon_vshld_n_u64: {
10644 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
10645 return Builder.CreateShl(
10646 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
10647 }
10648 case NEON::BI__builtin_neon_vshrd_n_s64: {
10649 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
10650 return Builder.CreateAShr(
10651 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
10652 Amt->getZExtValue())),
10653 "shrd_n");
10654 }
10655 case NEON::BI__builtin_neon_vshrd_n_u64: {
10656 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
10657 uint64_t ShiftAmt = Amt->getZExtValue();
10658 // Right-shifting an unsigned value by its size yields 0.
10659 if (ShiftAmt == 64)
10660 return ConstantInt::get(Int64Ty, 0);
10661 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
10662 "shrd_n");
10663 }
10664 case NEON::BI__builtin_neon_vsrad_n_s64: {
10665 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
10666 Ops[1] = Builder.CreateAShr(
10667 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
10668 Amt->getZExtValue())),
10669 "shrd_n");
10670 return Builder.CreateAdd(Ops[0], Ops[1]);
10671 }
10672 case NEON::BI__builtin_neon_vsrad_n_u64: {
10673 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
10674 uint64_t ShiftAmt = Amt->getZExtValue();
10675 // Right-shifting an unsigned value by its size yields 0.
10676 // As Op + 0 = Op, return Ops[0] directly.
10677 if (ShiftAmt == 64)
10678 return Ops[0];
10679 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
10680 "shrd_n");
10681 return Builder.CreateAdd(Ops[0], Ops[1]);
10682 }
10683 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
10684 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
10685 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
10686 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
10687 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
10688 "lane");
10689 SmallVector<Value *, 2> ProductOps;
10690 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
10691 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
10692 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
10693 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
10694 ProductOps, "vqdmlXl");
10695 Constant *CI = ConstantInt::get(SizeTy, 0);
10696 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
10697 Ops.pop_back();
10698
10699 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
10700 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
10701 ? Intrinsic::aarch64_neon_sqadd
10702 : Intrinsic::aarch64_neon_sqsub;
10703 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
10704 }
10705 case NEON::BI__builtin_neon_vqdmlals_s32:
10706 case NEON::BI__builtin_neon_vqdmlsls_s32: {
10707 SmallVector<Value *, 2> ProductOps;
10708 ProductOps.push_back(Ops[1]);
10709 ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
10710 Ops[1] =
10711 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
10712 ProductOps, "vqdmlXl");
10713
10714 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
10715 ? Intrinsic::aarch64_neon_sqadd
10716 : Intrinsic::aarch64_neon_sqsub;
10717 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
10718 }
10719 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
10720 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
10721 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
10722 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
10723 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
10724 "lane");
10725 SmallVector<Value *, 2> ProductOps;
10726 ProductOps.push_back(Ops[1]);
10727 ProductOps.push_back(Ops[2]);
10728 Ops[1] =
10729 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
10730 ProductOps, "vqdmlXl");
10731 Ops.pop_back();
10732
10733 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
10734 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
10735 ? Intrinsic::aarch64_neon_sqadd
10736 : Intrinsic::aarch64_neon_sqsub;
10737 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
10738 }
10739 case NEON::BI__builtin_neon_vget_lane_bf16:
10740 case NEON::BI__builtin_neon_vduph_lane_bf16:
10741 case NEON::BI__builtin_neon_vduph_lane_f16: {
10742 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
10743 "vget_lane");
10744 }
10745 case NEON::BI__builtin_neon_vgetq_lane_bf16:
10746 case NEON::BI__builtin_neon_vduph_laneq_bf16:
10747 case NEON::BI__builtin_neon_vduph_laneq_f16: {
10748 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
10749 "vgetq_lane");
10750 }
10751
10752 case clang::AArch64::BI_InterlockedAdd: {
10753 Value *Arg0 = EmitScalarExpr(E->getArg(0));
10754 Value *Arg1 = EmitScalarExpr(E->getArg(1));
10755 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
10756 AtomicRMWInst::Add, Arg0, Arg1,
10757 llvm::AtomicOrdering::SequentiallyConsistent);
10758 return Builder.CreateAdd(RMWI, Arg1);
10759 }
10760 }
10761
10762 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
10763 llvm::Type *Ty = VTy;
10764 if (!Ty)
10765 return nullptr;
10766
10767 // Not all intrinsics handled by the common case work for AArch64 yet, so only
10768 // defer to common code if it's been added to our special map.
10771
10772 if (Builtin)
10774 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
10775 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
10776 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
10777
10778 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
10779 return V;
10780
10781 unsigned Int;
10782 switch (BuiltinID) {
10783 default: return nullptr;
10784 case NEON::BI__builtin_neon_vbsl_v:
10785 case NEON::BI__builtin_neon_vbslq_v: {
10786 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
10787 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
10788 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
10789 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
10790
10791 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
10792 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
10793 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
10794 return Builder.CreateBitCast(Ops[0], Ty);
10795 }
10796 case NEON::BI__builtin_neon_vfma_lane_v:
10797 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
10798 // The ARM builtins (and instructions) have the addend as the first
10799 // operand, but the 'fma' intrinsics have it last. Swap it around here.
10800 Value *Addend = Ops[0];
10801 Value *Multiplicand = Ops[1];
10802 Value *LaneSource = Ops[2];
10803 Ops[0] = Multiplicand;
10804 Ops[1] = LaneSource;
10805 Ops[2] = Addend;
10806
10807 // Now adjust things to handle the lane access.
10808 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
10809 ? llvm::FixedVectorType::get(VTy->getElementType(),
10810 VTy->getNumElements() / 2)
10811 : VTy;
10812 llvm::Constant *cst = cast<Constant>(Ops[3]);
10813 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
10814 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
10815 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
10816
10817 Ops.pop_back();
10818 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
10819 : Intrinsic::fma;
10820 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
10821 }
10822 case NEON::BI__builtin_neon_vfma_laneq_v: {
10823 auto *VTy = cast<llvm::FixedVectorType>(Ty);
10824 // v1f64 fma should be mapped to Neon scalar f64 fma
10825 if (VTy && VTy->getElementType() == DoubleTy) {
10826 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
10827 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
10828 llvm::FixedVectorType *VTy =
10830 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
10831 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
10832 Value *Result;
10834 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
10835 DoubleTy, {Ops[1], Ops[2], Ops[0]});
10836 return Builder.CreateBitCast(Result, Ty);
10837 }
10838 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
10839 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
10840
10841 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
10842 VTy->getNumElements() * 2);
10843 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
10844 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
10845 cast<ConstantInt>(Ops[3]));
10846 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
10847
10849 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
10850 {Ops[2], Ops[1], Ops[0]});
10851 }
10852 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
10853 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
10854 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
10855
10856 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
10857 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
10859 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
10860 {Ops[2], Ops[1], Ops[0]});
10861 }
10862 case NEON::BI__builtin_neon_vfmah_lane_f16:
10863 case NEON::BI__builtin_neon_vfmas_lane_f32:
10864 case NEON::BI__builtin_neon_vfmah_laneq_f16:
10865 case NEON::BI__builtin_neon_vfmas_laneq_f32:
10866 case NEON::BI__builtin_neon_vfmad_lane_f64:
10867 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
10868 Ops.push_back(EmitScalarExpr(E->getArg(3)));
10869 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
10870 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
10872 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
10873 {Ops[1], Ops[2], Ops[0]});
10874 }
10875 case NEON::BI__builtin_neon_vmull_v:
10876 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
10877 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
10878 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
10879 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
10880 case NEON::BI__builtin_neon_vmax_v:
10881 case NEON::BI__builtin_neon_vmaxq_v:
10882 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
10883 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
10884 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
10885 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
10886 case NEON::BI__builtin_neon_vmaxh_f16: {
10887 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10888 Int = Intrinsic::aarch64_neon_fmax;
10889 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
10890 }
10891 case NEON::BI__builtin_neon_vmin_v:
10892 case NEON::BI__builtin_neon_vminq_v:
10893 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
10894 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
10895 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
10896 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
10897 case NEON::BI__builtin_neon_vminh_f16: {
10898 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10899 Int = Intrinsic::aarch64_neon_fmin;
10900 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
10901 }
10902 case NEON::BI__builtin_neon_vabd_v:
10903 case NEON::BI__builtin_neon_vabdq_v:
10904 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
10905 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
10906 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
10907 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
10908 case NEON::BI__builtin_neon_vpadal_v:
10909 case NEON::BI__builtin_neon_vpadalq_v: {
10910 unsigned ArgElts = VTy->getNumElements();
10911 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
10912 unsigned BitWidth = EltTy->getBitWidth();
10913 auto *ArgTy = llvm::FixedVectorType::get(
10914 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
10915 llvm::Type* Tys[2] = { VTy, ArgTy };
10916 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
10918 TmpOps.push_back(Ops[1]);
10919 Function *F = CGM.getIntrinsic(Int, Tys);
10920 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
10921 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
10922 return Builder.CreateAdd(tmp, addend);
10923 }
10924 case NEON::BI__builtin_neon_vpmin_v:
10925 case NEON::BI__builtin_neon_vpminq_v:
10926 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
10927 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
10928 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
10929 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
10930 case NEON::BI__builtin_neon_vpmax_v:
10931 case NEON::BI__builtin_neon_vpmaxq_v:
10932 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
10933 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
10934 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
10935 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
10936 case NEON::BI__builtin_neon_vminnm_v:
10937 case NEON::BI__builtin_neon_vminnmq_v:
10938 Int = Intrinsic::aarch64_neon_fminnm;
10939 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
10940 case NEON::BI__builtin_neon_vminnmh_f16:
10941 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10942 Int = Intrinsic::aarch64_neon_fminnm;
10943 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
10944 case NEON::BI__builtin_neon_vmaxnm_v:
10945 case NEON::BI__builtin_neon_vmaxnmq_v:
10946 Int = Intrinsic::aarch64_neon_fmaxnm;
10947 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
10948 case NEON::BI__builtin_neon_vmaxnmh_f16:
10949 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10950 Int = Intrinsic::aarch64_neon_fmaxnm;
10951 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
10952 case NEON::BI__builtin_neon_vrecpss_f32: {
10953 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10954 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
10955 Ops, "vrecps");
10956 }
10957 case NEON::BI__builtin_neon_vrecpsd_f64:
10958 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10959 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
10960 Ops, "vrecps");
10961 case NEON::BI__builtin_neon_vrecpsh_f16:
10962 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10963 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
10964 Ops, "vrecps");
10965 case NEON::BI__builtin_neon_vqshrun_n_v:
10966 Int = Intrinsic::aarch64_neon_sqshrun;
10967 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
10968 case NEON::BI__builtin_neon_vqrshrun_n_v:
10969 Int = Intrinsic::aarch64_neon_sqrshrun;
10970 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
10971 case NEON::BI__builtin_neon_vqshrn_n_v:
10972 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
10973 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
10974 case NEON::BI__builtin_neon_vrshrn_n_v:
10975 Int = Intrinsic::aarch64_neon_rshrn;
10976 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
10977 case NEON::BI__builtin_neon_vqrshrn_n_v:
10978 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
10979 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
10980 case NEON::BI__builtin_neon_vrndah_f16: {
10981 Ops.push_back(EmitScalarExpr(E->getArg(0)));
10982 Int = Builder.getIsFPConstrained()
10983 ? Intrinsic::experimental_constrained_round
10984 : Intrinsic::round;
10985 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
10986 }
10987 case NEON::BI__builtin_neon_vrnda_v:
10988 case NEON::BI__builtin_neon_vrndaq_v: {
10989 Int = Builder.getIsFPConstrained()
10990 ? Intrinsic::experimental_constrained_round
10991 : Intrinsic::round;
10992 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
10993 }
10994 case NEON::BI__builtin_neon_vrndih_f16: {
10995 Ops.push_back(EmitScalarExpr(E->getArg(0)));
10996 Int = Builder.getIsFPConstrained()
10997 ? Intrinsic::experimental_constrained_nearbyint
10998 : Intrinsic::nearbyint;
10999 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
11000 }
11001 case NEON::BI__builtin_neon_vrndmh_f16: {
11002 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11003 Int = Builder.getIsFPConstrained()
11004 ? Intrinsic::experimental_constrained_floor
11005 : Intrinsic::floor;
11006 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
11007 }
11008 case NEON::BI__builtin_neon_vrndm_v:
11009 case NEON::BI__builtin_neon_vrndmq_v: {
11010 Int = Builder.getIsFPConstrained()
11011 ? Intrinsic::experimental_constrained_floor
11012 : Intrinsic::floor;
11013 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
11014 }
11015 case NEON::BI__builtin_neon_vrndnh_f16: {
11016 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11017 Int = Builder.getIsFPConstrained()
11018 ? Intrinsic::experimental_constrained_roundeven
11019 : Intrinsic::roundeven;
11020 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
11021 }
11022 case NEON::BI__builtin_neon_vrndn_v:
11023 case NEON::BI__builtin_neon_vrndnq_v: {
11024 Int = Builder.getIsFPConstrained()
11025 ? Intrinsic::experimental_constrained_roundeven
11026 : Intrinsic::roundeven;
11027 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
11028 }
11029 case NEON::BI__builtin_neon_vrndns_f32: {
11030 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11031 Int = Builder.getIsFPConstrained()
11032 ? Intrinsic::experimental_constrained_roundeven
11033 : Intrinsic::roundeven;
11034 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
11035 }
11036 case NEON::BI__builtin_neon_vrndph_f16: {
11037 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11038 Int = Builder.getIsFPConstrained()
11039 ? Intrinsic::experimental_constrained_ceil
11040 : Intrinsic::ceil;
11041 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
11042 }
11043 case NEON::BI__builtin_neon_vrndp_v:
11044 case NEON::BI__builtin_neon_vrndpq_v: {
11045 Int = Builder.getIsFPConstrained()
11046 ? Intrinsic::experimental_constrained_ceil
11047 : Intrinsic::ceil;
11048 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
11049 }
11050 case NEON::BI__builtin_neon_vrndxh_f16: {
11051 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11052 Int = Builder.getIsFPConstrained()
11053 ? Intrinsic::experimental_constrained_rint
11054 : Intrinsic::rint;
11055 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
11056 }
11057 case NEON::BI__builtin_neon_vrndx_v:
11058 case NEON::BI__builtin_neon_vrndxq_v: {
11059 Int = Builder.getIsFPConstrained()
11060 ? Intrinsic::experimental_constrained_rint
11061 : Intrinsic::rint;
11062 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
11063 }
11064 case NEON::BI__builtin_neon_vrndh_f16: {
11065 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11066 Int = Builder.getIsFPConstrained()
11067 ? Intrinsic::experimental_constrained_trunc
11068 : Intrinsic::trunc;
11069 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
11070 }
11071 case NEON::BI__builtin_neon_vrnd32x_v:
11072 case NEON::BI__builtin_neon_vrnd32xq_v: {
11073 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11074 Int = Intrinsic::aarch64_neon_frint32x;
11075 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
11076 }
11077 case NEON::BI__builtin_neon_vrnd32z_v:
11078 case NEON::BI__builtin_neon_vrnd32zq_v: {
11079 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11080 Int = Intrinsic::aarch64_neon_frint32z;
11081 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
11082 }
11083 case NEON::BI__builtin_neon_vrnd64x_v:
11084 case NEON::BI__builtin_neon_vrnd64xq_v: {
11085 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11086 Int = Intrinsic::aarch64_neon_frint64x;
11087 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
11088 }
11089 case NEON::BI__builtin_neon_vrnd64z_v:
11090 case NEON::BI__builtin_neon_vrnd64zq_v: {
11091 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11092 Int = Intrinsic::aarch64_neon_frint64z;
11093 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
11094 }
11095 case NEON::BI__builtin_neon_vrnd_v:
11096 case NEON::BI__builtin_neon_vrndq_v: {
11097 Int = Builder.getIsFPConstrained()
11098 ? Intrinsic::experimental_constrained_trunc
11099 : Intrinsic::trunc;
11100 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
11101 }
11102 case NEON::BI__builtin_neon_vcvt_f64_v:
11103 case NEON::BI__builtin_neon_vcvtq_f64_v:
11104 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
11105 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
11106 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
11107 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
11108 case NEON::BI__builtin_neon_vcvt_f64_f32: {
11109 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
11110 "unexpected vcvt_f64_f32 builtin");
11111 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
11112 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
11113
11114 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
11115 }
11116 case NEON::BI__builtin_neon_vcvt_f32_f64: {
11117 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
11118 "unexpected vcvt_f32_f64 builtin");
11119 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
11120 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
11121
11122 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
11123 }
11124 case NEON::BI__builtin_neon_vcvt_s32_v:
11125 case NEON::BI__builtin_neon_vcvt_u32_v:
11126 case NEON::BI__builtin_neon_vcvt_s64_v:
11127 case NEON::BI__builtin_neon_vcvt_u64_v:
11128 case NEON::BI__builtin_neon_vcvt_s16_v:
11129 case NEON::BI__builtin_neon_vcvt_u16_v:
11130 case NEON::BI__builtin_neon_vcvtq_s32_v:
11131 case NEON::BI__builtin_neon_vcvtq_u32_v:
11132 case NEON::BI__builtin_neon_vcvtq_s64_v:
11133 case NEON::BI__builtin_neon_vcvtq_u64_v:
11134 case NEON::BI__builtin_neon_vcvtq_s16_v:
11135 case NEON::BI__builtin_neon_vcvtq_u16_v: {
11136 Int =
11137 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
11138 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
11139 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
11140 }
11141 case NEON::BI__builtin_neon_vcvta_s16_v:
11142 case NEON::BI__builtin_neon_vcvta_u16_v:
11143 case NEON::BI__builtin_neon_vcvta_s32_v:
11144 case NEON::BI__builtin_neon_vcvtaq_s16_v:
11145 case NEON::BI__builtin_neon_vcvtaq_s32_v:
11146 case NEON::BI__builtin_neon_vcvta_u32_v:
11147 case NEON::BI__builtin_neon_vcvtaq_u16_v:
11148 case NEON::BI__builtin_neon_vcvtaq_u32_v:
11149 case NEON::BI__builtin_neon_vcvta_s64_v:
11150 case NEON::BI__builtin_neon_vcvtaq_s64_v:
11151 case NEON::BI__builtin_neon_vcvta_u64_v:
11152 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
11153 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
11154 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
11155 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
11156 }
11157 case NEON::BI__builtin_neon_vcvtm_s16_v:
11158 case NEON::BI__builtin_neon_vcvtm_s32_v:
11159 case NEON::BI__builtin_neon_vcvtmq_s16_v:
11160 case NEON::BI__builtin_neon_vcvtmq_s32_v:
11161 case NEON::BI__builtin_neon_vcvtm_u16_v:
11162 case NEON::BI__builtin_neon_vcvtm_u32_v:
11163 case NEON::BI__builtin_neon_vcvtmq_u16_v:
11164 case NEON::BI__builtin_neon_vcvtmq_u32_v:
11165 case NEON::BI__builtin_neon_vcvtm_s64_v:
11166 case NEON::BI__builtin_neon_vcvtmq_s64_v:
11167 case NEON::BI__builtin_neon_vcvtm_u64_v:
11168 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
11169 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
11170 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
11171 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
11172 }
11173 case NEON::BI__builtin_neon_vcvtn_s16_v:
11174 case NEON::BI__builtin_neon_vcvtn_s32_v:
11175 case NEON::BI__builtin_neon_vcvtnq_s16_v:
11176 case NEON::BI__builtin_neon_vcvtnq_s32_v:
11177 case NEON::BI__builtin_neon_vcvtn_u16_v:
11178 case NEON::BI__builtin_neon_vcvtn_u32_v:
11179 case NEON::BI__builtin_neon_vcvtnq_u16_v:
11180 case NEON::BI__builtin_neon_vcvtnq_u32_v:
11181 case NEON::BI__builtin_neon_vcvtn_s64_v:
11182 case NEON::BI__builtin_neon_vcvtnq_s64_v:
11183 case NEON::BI__builtin_neon_vcvtn_u64_v:
11184 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
11185 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
11186 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
11187 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
11188 }
11189 case NEON::BI__builtin_neon_vcvtp_s16_v:
11190 case NEON::BI__builtin_neon_vcvtp_s32_v:
11191 case NEON::BI__builtin_neon_vcvtpq_s16_v:
11192 case NEON::BI__builtin_neon_vcvtpq_s32_v:
11193 case NEON::BI__builtin_neon_vcvtp_u16_v:
11194 case NEON::BI__builtin_neon_vcvtp_u32_v:
11195 case NEON::BI__builtin_neon_vcvtpq_u16_v:
11196 case NEON::BI__builtin_neon_vcvtpq_u32_v:
11197 case NEON::BI__builtin_neon_vcvtp_s64_v:
11198 case NEON::BI__builtin_neon_vcvtpq_s64_v:
11199 case NEON::BI__builtin_neon_vcvtp_u64_v:
11200 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
11201 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
11202 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
11203 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
11204 }
11205 case NEON::BI__builtin_neon_vmulx_v:
11206 case NEON::BI__builtin_neon_vmulxq_v: {
11207 Int = Intrinsic::aarch64_neon_fmulx;
11208 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
11209 }
11210 case NEON::BI__builtin_neon_vmulxh_lane_f16:
11211 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
11212 // vmulx_lane should be mapped to Neon scalar mulx after
11213 // extracting the scalar element
11214 Ops.push_back(EmitScalarExpr(E->getArg(2)));
11215 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
11216 Ops.pop_back();
11217 Int = Intrinsic::aarch64_neon_fmulx;
11218 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
11219 }
11220 case NEON::BI__builtin_neon_vmul_lane_v:
11221 case NEON::BI__builtin_neon_vmul_laneq_v: {
11222 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
11223 bool Quad = false;
11224 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
11225 Quad = true;
11226 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
11227 llvm::FixedVectorType *VTy =
11229 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
11230 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
11231 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
11232 return Builder.CreateBitCast(Result, Ty);
11233 }
11234 case NEON::BI__builtin_neon_vnegd_s64:
11235 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
11236 case NEON::BI__builtin_neon_vnegh_f16:
11237 return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
11238 case NEON::BI__builtin_neon_vpmaxnm_v:
11239 case NEON::BI__builtin_neon_vpmaxnmq_v: {
11240 Int = Intrinsic::aarch64_neon_fmaxnmp;
11241 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
11242 }
11243 case NEON::BI__builtin_neon_vpminnm_v:
11244 case NEON::BI__builtin_neon_vpminnmq_v: {
11245 Int = Intrinsic::aarch64_neon_fminnmp;
11246 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
11247 }
11248 case NEON::BI__builtin_neon_vsqrth_f16: {
11249 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11250 Int = Builder.getIsFPConstrained()
11251 ? Intrinsic::experimental_constrained_sqrt
11252 : Intrinsic::sqrt;
11253 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
11254 }
11255 case NEON::BI__builtin_neon_vsqrt_v:
11256 case NEON::BI__builtin_neon_vsqrtq_v: {
11257 Int = Builder.getIsFPConstrained()
11258 ? Intrinsic::experimental_constrained_sqrt
11259 : Intrinsic::sqrt;
11260 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
11261 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
11262 }
11263 case NEON::BI__builtin_neon_vrbit_v:
11264 case NEON::BI__builtin_neon_vrbitq_v: {
11265 Int = Intrinsic::bitreverse;
11266 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
11267 }
11268 case NEON::BI__builtin_neon_vaddv_u8:
11269 // FIXME: These are handled by the AArch64 scalar code.
11270 usgn = true;
11271 LLVM_FALLTHROUGH;
11272 case NEON::BI__builtin_neon_vaddv_s8: {
11273 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
11274 Ty = Int32Ty;
11275 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
11276 llvm::Type *Tys[2] = { Ty, VTy };
11277 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11278 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
11279 return Builder.CreateTrunc(Ops[0], Int8Ty);
11280 }
11281 case NEON::BI__builtin_neon_vaddv_u16:
11282 usgn = true;
11283 LLVM_FALLTHROUGH;
11284 case NEON::BI__builtin_neon_vaddv_s16: {
11285 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
11286 Ty = Int32Ty;
11287 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
11288 llvm::Type *Tys[2] = { Ty, VTy };
11289 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11290 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
11291 return Builder.CreateTrunc(Ops[0], Int16Ty);
11292 }
11293 case NEON::BI__builtin_neon_vaddvq_u8:
11294 usgn = true;
11295 LLVM_FALLTHROUGH;
11296 case NEON::BI__builtin_neon_vaddvq_s8: {
11297 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
11298 Ty = Int32Ty;
11299 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
11300 llvm::Type *Tys[2] = { Ty, VTy };
11301 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11302 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
11303 return Builder.CreateTrunc(Ops[0], Int8Ty);
11304 }
11305 case NEON::BI__builtin_neon_vaddvq_u16:
11306 usgn = true;
11307 LLVM_FALLTHROUGH;
11308 case NEON::BI__builtin_neon_vaddvq_s16: {
11309 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
11310 Ty = Int32Ty;
11311 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
11312 llvm::Type *Tys[2] = { Ty, VTy };
11313 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11314 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
11315 return Builder.CreateTrunc(Ops[0], Int16Ty);
11316 }
11317 case NEON::BI__builtin_neon_vmaxv_u8: {
11318 Int = Intrinsic::aarch64_neon_umaxv;
11319 Ty = Int32Ty;
11320 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
11321 llvm::Type *Tys[2] = { Ty, VTy };
11322 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11323 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
11324 return Builder.CreateTrunc(Ops[0], Int8Ty);
11325 }
11326 case NEON::BI__builtin_neon_vmaxv_u16: {
11327 Int = Intrinsic::aarch64_neon_umaxv;
11328 Ty = Int32Ty;
11329 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
11330 llvm::Type *Tys[2] = { Ty, VTy };
11331 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11332 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
11333 return Builder.CreateTrunc(Ops[0], Int16Ty);
11334 }
11335 case NEON::BI__builtin_neon_vmaxvq_u8: {
11336 Int = Intrinsic::aarch64_neon_umaxv;
11337 Ty = Int32Ty;
11338 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
11339 llvm::Type *Tys[2] = { Ty, VTy };
11340 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11341 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
11342 return Builder.CreateTrunc(Ops[0], Int8Ty);
11343 }
11344 case NEON::BI__builtin_neon_vmaxvq_u16: {
11345 Int = Intrinsic::aarch64_neon_umaxv;
11346 Ty = Int32Ty;
11347 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
11348 llvm::Type *Tys[2] = { Ty, VTy };
11349 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11350 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
11351 return Builder.CreateTrunc(Ops[0], Int16Ty);
11352 }
11353 case NEON::BI__builtin_neon_vmaxv_s8: {
11354 Int = Intrinsic::aarch64_neon_smaxv;
11355 Ty = Int32Ty;
11356 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
11357 llvm::Type *Tys[2] = { Ty, VTy };
11358 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11359 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
11360 return Builder.CreateTrunc(Ops[0], Int8Ty);
11361 }
11362 case NEON::BI__builtin_neon_vmaxv_s16: {
11363 Int = Intrinsic::aarch64_neon_smaxv;
11364 Ty = Int32Ty;
11365 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
11366 llvm::Type *Tys[2] = { Ty, VTy };
11367 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11368 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
11369 return Builder.CreateTrunc(Ops[0], Int16Ty);
11370 }
11371 case NEON::BI__builtin_neon_vmaxvq_s8: {
11372 Int = Intrinsic::aarch64_neon_smaxv;
11373 Ty = Int32Ty;
11374 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
11375 llvm::Type *Tys[2] = { Ty, VTy };
11376 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11377 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
11378 return Builder.CreateTrunc(Ops[0], Int8Ty);
11379 }
11380 case NEON::BI__builtin_neon_vmaxvq_s16: {
11381 Int = Intrinsic::aarch64_neon_smaxv;
11382 Ty = Int32Ty;
11383 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
11384 llvm::Type *Tys[2] = { Ty, VTy };
11385 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11386 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
11387 return Builder.CreateTrunc(Ops[0], Int16Ty);
11388 }
11389 case NEON::BI__builtin_neon_vmaxv_f16: {
11390 Int = Intrinsic::aarch64_neon_fmaxv;
11391 Ty = HalfTy;
11392 VTy = llvm::FixedVectorType::get(HalfTy, 4);
11393 llvm::Type *Tys[2] = { Ty, VTy };
11394 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11395 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
11396 return Builder.CreateTrunc(Ops[0], HalfTy);
11397 }
11398 case NEON::BI__builtin_neon_vmaxvq_f16: {
11399 Int = Intrinsic::aarch64_neon_fmaxv;
11400 Ty = HalfTy;
11401 VTy = llvm::FixedVectorType::get(HalfTy, 8);
11402 llvm::Type *Tys[2] = { Ty, VTy };
11403 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11404 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
11405 return Builder.CreateTrunc(Ops[0], HalfTy);
11406 }
11407 case NEON::BI__builtin_neon_vminv_u8: {
11408 Int = Intrinsic::aarch64_neon_uminv;
11409 Ty = Int32Ty;
11410 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
11411 llvm::Type *Tys[2] = { Ty, VTy };
11412 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11413 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
11414 return Builder.CreateTrunc(Ops[0], Int8Ty);
11415 }
11416 case NEON::BI__builtin_neon_vminv_u16: {
11417 Int = Intrinsic::aarch64_neon_uminv;
11418 Ty = Int32Ty;
11419 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
11420 llvm::Type *Tys[2] = { Ty, VTy };
11421 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11422 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
11423 return Builder.CreateTrunc(Ops[0], Int16Ty);
11424 }
11425 case NEON::BI__builtin_neon_vminvq_u8: {
11426 Int = Intrinsic::aarch64_neon_uminv;
11427 Ty = Int32Ty;
11428 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
11429 llvm::Type *Tys[2] = { Ty, VTy };
11430 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11431 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
11432 return Builder.CreateTrunc(Ops[0], Int8Ty);
11433 }
11434 case NEON::BI__builtin_neon_vminvq_u16: {
11435 Int = Intrinsic::aarch64_neon_uminv;
11436 Ty = Int32Ty;
11437 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
11438 llvm::Type *Tys[2] = { Ty, VTy };
11439 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11440 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
11441 return Builder.CreateTrunc(Ops[0], Int16Ty);
11442 }
11443 case NEON::BI__builtin_neon_vminv_s8: {
11444 Int = Intrinsic::aarch64_neon_sminv;
11445 Ty = Int32Ty;
11446 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
11447 llvm::Type *Tys[2] = { Ty, VTy };
11448 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11449 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
11450 return Builder.CreateTrunc(Ops[0], Int8Ty);
11451 }
11452 case NEON::BI__builtin_neon_vminv_s16: {
11453 Int = Intrinsic::aarch64_neon_sminv;
11454 Ty = Int32Ty;
11455 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
11456 llvm::Type *Tys[2] = { Ty, VTy };
11457 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11458 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
11459 return Builder.CreateTrunc(Ops[0], Int16Ty);
11460 }
11461 case NEON::BI__builtin_neon_vminvq_s8: {
11462 Int = Intrinsic::aarch64_neon_sminv;
11463 Ty = Int32Ty;
11464 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
11465 llvm::Type *Tys[2] = { Ty, VTy };
11466 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11467 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
11468 return Builder.CreateTrunc(Ops[0], Int8Ty);
11469 }
11470 case NEON::BI__builtin_neon_vminvq_s16: {
11471 Int = Intrinsic::aarch64_neon_sminv;
11472 Ty = Int32Ty;
11473 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
11474 llvm::Type *Tys[2] = { Ty, VTy };
11475 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11476 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
11477 return Builder.CreateTrunc(Ops[0], Int16Ty);
11478 }
11479 case NEON::BI__builtin_neon_vminv_f16: {
11480 Int = Intrinsic::aarch64_neon_fminv;
11481 Ty = HalfTy;
11482 VTy = llvm::FixedVectorType::get(HalfTy, 4);
11483 llvm::Type *Tys[2] = { Ty, VTy };
11484 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11485 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
11486 return Builder.CreateTrunc(Ops[0], HalfTy);
11487 }
11488 case NEON::BI__builtin_neon_vminvq_f16: {
11489 Int = Intrinsic::aarch64_neon_fminv;
11490 Ty = HalfTy;
11491 VTy = llvm::FixedVectorType::get(HalfTy, 8);
11492 llvm::Type *Tys[2] = { Ty, VTy };
11493 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11494 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
11495 return Builder.CreateTrunc(Ops[0], HalfTy);
11496 }
11497 case NEON::BI__builtin_neon_vmaxnmv_f16: {
11498 Int = Intrinsic::aarch64_neon_fmaxnmv;
11499 Ty = HalfTy;
11500 VTy = llvm::FixedVectorType::get(HalfTy, 4);
11501 llvm::Type *Tys[2] = { Ty, VTy };
11502 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11503 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
11504 return Builder.CreateTrunc(Ops[0], HalfTy);
11505 }
11506 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
11507 Int = Intrinsic::aarch64_neon_fmaxnmv;
11508 Ty = HalfTy;
11509 VTy = llvm::FixedVectorType::get(HalfTy, 8);
11510 llvm::Type *Tys[2] = { Ty, VTy };
11511 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11512 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
11513 return Builder.CreateTrunc(Ops[0], HalfTy);
11514 }
11515 case NEON::BI__builtin_neon_vminnmv_f16: {
11516 Int = Intrinsic::aarch64_neon_fminnmv;
11517 Ty = HalfTy;
11518 VTy = llvm::FixedVectorType::get(HalfTy, 4);
11519 llvm::Type *Tys[2] = { Ty, VTy };
11520 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11521 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
11522 return Builder.CreateTrunc(Ops[0], HalfTy);
11523 }
11524 case NEON::BI__builtin_neon_vminnmvq_f16: {
11525 Int = Intrinsic::aarch64_neon_fminnmv;
11526 Ty = HalfTy;
11527 VTy = llvm::FixedVectorType::get(HalfTy, 8);
11528 llvm::Type *Tys[2] = { Ty, VTy };
11529 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11530 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
11531 return Builder.CreateTrunc(Ops[0], HalfTy);
11532 }
11533 case NEON::BI__builtin_neon_vmul_n_f64: {
11534 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
11535 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
11536 return Builder.CreateFMul(Ops[0], RHS);
11537 }
11538 case NEON::BI__builtin_neon_vaddlv_u8: {
11539 Int = Intrinsic::aarch64_neon_uaddlv;
11540 Ty = Int32Ty;
11541 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
11542 llvm::Type *Tys[2] = { Ty, VTy };
11543 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11544 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
11545 return Builder.CreateTrunc(Ops[0], Int16Ty);
11546 }
11547 case NEON::BI__builtin_neon_vaddlv_u16: {
11548 Int = Intrinsic::aarch64_neon_uaddlv;
11549 Ty = Int32Ty;
11550 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
11551 llvm::Type *Tys[2] = { Ty, VTy };
11552 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11553 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
11554 }
11555 case NEON::BI__builtin_neon_vaddlvq_u8: {
11556 Int = Intrinsic::aarch64_neon_uaddlv;
11557 Ty = Int32Ty;
11558 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
11559 llvm::Type *Tys[2] = { Ty, VTy };
11560 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11561 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
11562 return Builder.CreateTrunc(Ops[0], Int16Ty);
11563 }
11564 case NEON::BI__builtin_neon_vaddlvq_u16: {
11565 Int = Intrinsic::aarch64_neon_uaddlv;
11566 Ty = Int32Ty;
11567 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
11568 llvm::Type *Tys[2] = { Ty, VTy };
11569 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11570 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
11571 }
11572 case NEON::BI__builtin_neon_vaddlv_s8: {
11573 Int = Intrinsic::aarch64_neon_saddlv;
11574 Ty = Int32Ty;
11575 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
11576 llvm::Type *Tys[2] = { Ty, VTy };
11577 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11578 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
11579 return Builder.CreateTrunc(Ops[0], Int16Ty);
11580 }
11581 case NEON::BI__builtin_neon_vaddlv_s16: {
11582 Int = Intrinsic::aarch64_neon_saddlv;
11583 Ty = Int32Ty;
11584 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
11585 llvm::Type *Tys[2] = { Ty, VTy };
11586 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11587 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
11588 }
11589 case NEON::BI__builtin_neon_vaddlvq_s8: {
11590 Int = Intrinsic::aarch64_neon_saddlv;
11591 Ty = Int32Ty;
11592 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
11593 llvm::Type *Tys[2] = { Ty, VTy };
11594 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11595 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
11596 return Builder.CreateTrunc(Ops[0], Int16Ty);
11597 }
11598 case NEON::BI__builtin_neon_vaddlvq_s16: {
11599 Int = Intrinsic::aarch64_neon_saddlv;
11600 Ty = Int32Ty;
11601 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
11602 llvm::Type *Tys[2] = { Ty, VTy };
11603 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11604 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
11605 }
11606 case NEON::BI__builtin_neon_vsri_n_v:
11607 case NEON::BI__builtin_neon_vsriq_n_v: {
11608 Int = Intrinsic::aarch64_neon_vsri;
11609 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
11610 return EmitNeonCall(Intrin, Ops, "vsri_n");
11611 }
11612 case NEON::BI__builtin_neon_vsli_n_v:
11613 case NEON::BI__builtin_neon_vsliq_n_v: {
11614 Int = Intrinsic::aarch64_neon_vsli;
11615 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
11616 return EmitNeonCall(Intrin, Ops, "vsli_n");
11617 }
11618 case NEON::BI__builtin_neon_vsra_n_v:
11619 case NEON::BI__builtin_neon_vsraq_n_v:
11620 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
11621 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
11622 return Builder.CreateAdd(Ops[0], Ops[1]);
11623 case NEON::BI__builtin_neon_vrsra_n_v:
11624 case NEON::BI__builtin_neon_vrsraq_n_v: {
11625 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
11627 TmpOps.push_back(Ops[1]);
11628 TmpOps.push_back(Ops[2]);
11629 Function* F = CGM.getIntrinsic(Int, Ty);
11630 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
11631 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
11632 return Builder.CreateAdd(Ops[0], tmp);
11633 }
11634 case NEON::BI__builtin_neon_vld1_v:
11635 case NEON::BI__builtin_neon_vld1q_v: {
11636 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
11637 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
11638 }
11639 case NEON::BI__builtin_neon_vst1_v:
11640 case NEON::BI__builtin_neon_vst1q_v:
11641 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
11642 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
11643 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
11644 case NEON::BI__builtin_neon_vld1_lane_v:
11645 case NEON::BI__builtin_neon_vld1q_lane_v: {
11646 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
11647 Ty = llvm::PointerType::getUnqual(VTy->getElementType());
11648 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
11649 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
11650 PtrOp0.getAlignment());
11651 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
11652 }
11653 case NEON::BI__builtin_neon_vld1_dup_v:
11654 case NEON::BI__builtin_neon_vld1q_dup_v: {
11655 Value *V = UndefValue::get(Ty);
11656 Ty = llvm::PointerType::getUnqual(VTy->getElementType());
11657 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
11658 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
11659 PtrOp0.getAlignment());
11660 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
11661 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
11662 return EmitNeonSplat(Ops[0], CI);
11663 }
11664 case NEON::BI__builtin_neon_vst1_lane_v:
11665 case NEON::BI__builtin_neon_vst1q_lane_v:
11666 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
11667 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
11668 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
11669 return Builder.CreateAlignedStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty),
11670 PtrOp0.getAlignment());
11671 case NEON::BI__builtin_neon_vld2_v:
11672 case NEON::BI__builtin_neon_vld2q_v: {
11673 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
11674 Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
11675 llvm::Type *Tys[2] = { VTy, PTy };
11676 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
11677 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
11678 Ops[0] = Builder.CreateBitCast(Ops[0],
11679 llvm::PointerType::getUnqual(Ops[1]->getType()));
11680 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
11681 }
11682 case NEON::BI__builtin_neon_vld3_v:
11683 case NEON::BI__builtin_neon_vld3q_v: {
11684 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
11685 Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
11686 llvm::Type *Tys[2] = { VTy, PTy };
11687 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
11688 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
11689 Ops[0] = Builder.CreateBitCast(Ops[0],
11690 llvm::PointerType::getUnqual(Ops[1]->getType()));
11691 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
11692 }
11693 case NEON::BI__builtin_neon_vld4_v:
11694 case NEON::BI__builtin_neon_vld4q_v: {
11695 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
11696 Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
11697 llvm::Type *Tys[2] = { VTy, PTy };
11698 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
11699 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
11700 Ops[0] = Builder.CreateBitCast(Ops[0],
11701 llvm::PointerType::getUnqual(Ops[1]->getType()));
11702 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
11703 }
11704 case NEON::BI__builtin_neon_vld2_dup_v:
11705 case NEON::BI__builtin_neon_vld2q_dup_v: {
11706 llvm::Type *PTy =
11707 llvm::PointerType::getUnqual(VTy->getElementType());
11708 Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
11709 llvm::Type *Tys[2] = { VTy, PTy };
11710 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
11711 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
11712 Ops[0] = Builder.CreateBitCast(Ops[0],
11713 llvm::PointerType::getUnqual(Ops[1]->getType()));
11714 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
11715 }
11716 case NEON::BI__builtin_neon_vld3_dup_v:
11717 case NEON::BI__builtin_neon_vld3q_dup_v: {
11718 llvm::Type *PTy =
11719 llvm::PointerType::getUnqual(VTy->getElementType());
11720 Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
11721 llvm::Type *Tys[2] = { VTy, PTy };
11722 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
11723 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
11724 Ops[0] = Builder.CreateBitCast(Ops[0],
11725 llvm::PointerType::getUnqual(Ops[1]->getType()));
11726 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
11727 }
11728 case NEON::BI__builtin_neon_vld4_dup_v:
11729 case NEON::BI__builtin_neon_vld4q_dup_v: {
11730 llvm::Type *PTy =
11731 llvm::PointerType::getUnqual(VTy->getElementType());
11732 Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
11733 llvm::Type *Tys[2] = { VTy, PTy };
11734 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
11735 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
11736 Ops[0] = Builder.CreateBitCast(Ops[0],
11737 llvm::PointerType::getUnqual(Ops[1]->getType()));
11738 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
11739 }
11740 case NEON::BI__builtin_neon_vld2_lane_v:
11741 case NEON::BI__builtin_neon_vld2q_lane_v: {
11742 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
11743 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
11744 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
11745 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
11746 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
11747 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
11748 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
11749 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
11750 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
11751 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
11752 }
11753 case NEON::BI__builtin_neon_vld3_lane_v:
11754 case NEON::BI__builtin_neon_vld3q_lane_v: {
11755 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
11756 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
11757 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
11758 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
11759 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
11760 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
11761 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
11762 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
11763 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
11764 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
11765 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
11766 }
11767 case NEON::BI__builtin_neon_vld4_lane_v:
11768 case NEON::BI__builtin_neon_vld4q_lane_v: {
11769 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
11770 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
11771 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
11772 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
11773 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
11774 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
11775 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
11776 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
11777 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
11778 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
11779 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
11780 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
11781 }
11782 case NEON::BI__builtin_neon_vst2_v:
11783 case NEON::BI__builtin_neon_vst2q_v: {
11784 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
11785 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
11786 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
11787 Ops, "");
11788 }
11789 case NEON::BI__builtin_neon_vst2_lane_v:
11790 case NEON::BI__builtin_neon_vst2q_lane_v: {
11791 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
11792 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
11793 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
11794 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
11795 Ops, "");
11796 }
11797 case NEON::BI__builtin_neon_vst3_v:
11798 case NEON::BI__builtin_neon_vst3q_v: {
11799 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
11800 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
11801 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
11802 Ops, "");
11803 }
11804 case NEON::BI__builtin_neon_vst3_lane_v:
11805 case NEON::BI__builtin_neon_vst3q_lane_v: {
11806 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
11807 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
11808 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
11809 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
11810 Ops, "");
11811 }
11812 case NEON::BI__builtin_neon_vst4_v:
11813 case NEON::BI__builtin_neon_vst4q_v: {
11814 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
11815 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
11816 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
11817 Ops, "");
11818 }
11819 case NEON::BI__builtin_neon_vst4_lane_v:
11820 case NEON::BI__builtin_neon_vst4q_lane_v: {
11821 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
11822 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
11823 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
11824 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
11825 Ops, "");
11826 }
11827 case NEON::BI__builtin_neon_vtrn_v:
11828 case NEON::BI__builtin_neon_vtrnq_v: {
11829 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
11830 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
11831 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
11832 Value *SV = nullptr;
11833
11834 for (unsigned vi = 0; vi != 2; ++vi) {
11835 SmallVector<int, 16> Indices;
11836 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
11837 Indices.push_back(i+vi);
11838 Indices.push_back(i+e+vi);
11839 }
11840 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
11841 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
11842 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
11843 }
11844 return SV;
11845 }
11846 case NEON::BI__builtin_neon_vuzp_v:
11847 case NEON::BI__builtin_neon_vuzpq_v: {
11848 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
11849 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
11850 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
11851 Value *SV = nullptr;
11852
11853 for (unsigned vi = 0; vi != 2; ++vi) {
11854 SmallVector<int, 16> Indices;
11855 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
11856 Indices.push_back(2*i+vi);
11857
11858 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
11859 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
11860 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
11861 }
11862 return SV;
11863 }
11864 case NEON::BI__builtin_neon_vzip_v:
11865 case NEON::BI__builtin_neon_vzipq_v: {
11866 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
11867 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
11868 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
11869 Value *SV = nullptr;
11870
11871 for (unsigned vi = 0; vi != 2; ++vi) {
11872 SmallVector<int, 16> Indices;
11873 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
11874 Indices.push_back((i + vi*e) >> 1);
11875 Indices.push_back(((i + vi*e) >> 1)+e);
11876 }
11877 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
11878 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
11879 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
11880 }
11881 return SV;
11882 }
11883 case NEON::BI__builtin_neon_vqtbl1q_v: {
11884 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
11885 Ops, "vtbl1");
11886 }
11887 case NEON::BI__builtin_neon_vqtbl2q_v: {
11888 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
11889 Ops, "vtbl2");
11890 }
11891 case NEON::BI__builtin_neon_vqtbl3q_v: {
11892 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
11893 Ops, "vtbl3");
11894 }
11895 case NEON::BI__builtin_neon_vqtbl4q_v: {
11896 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
11897 Ops, "vtbl4");
11898 }
11899 case NEON::BI__builtin_neon_vqtbx1q_v: {
11900 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
11901 Ops, "vtbx1");
11902 }
11903 case NEON::BI__builtin_neon_vqtbx2q_v: {
11904 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
11905 Ops, "vtbx2");
11906 }
11907 case NEON::BI__builtin_neon_vqtbx3q_v: {
11908 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
11909 Ops, "vtbx3");
11910 }
11911 case NEON::BI__builtin_neon_vqtbx4q_v: {
11912 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
11913 Ops, "vtbx4");
11914 }
11915 case NEON::BI__builtin_neon_vsqadd_v:
11916 case NEON::BI__builtin_neon_vsqaddq_v: {
11917 Int = Intrinsic::aarch64_neon_usqadd;
11918 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
11919 }
11920 case NEON::BI__builtin_neon_vuqadd_v:
11921 case NEON::BI__builtin_neon_vuqaddq_v: {
11922 Int = Intrinsic::aarch64_neon_suqadd;
11923 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
11924 }
11925 }
11926}
11927
11928Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
11929 const CallExpr *E) {
11930 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
11931 BuiltinID == BPF::BI__builtin_btf_type_id ||
11932 BuiltinID == BPF::BI__builtin_preserve_type_info ||
11933 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
11934 "unexpected BPF builtin");
11935
11936 // A sequence number, injected into IR builtin functions, to
11937 // prevent CSE given the only difference of the funciton
11938 // may just be the debuginfo metadata.
11939 static uint32_t BuiltinSeqNum;
11940
11941 switch (BuiltinID) {
11942 default:
11943 llvm_unreachable("Unexpected BPF builtin");
11944 case BPF::BI__builtin_preserve_field_info: {
11945 const Expr *Arg = E->getArg(0);
11946 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
11947
11948 if (!getDebugInfo()) {
11949 CGM.Error(E->getExprLoc(),
11950 "using __builtin_preserve_field_info() without -g");
11951 return IsBitField ? EmitLValue(Arg).getBitFieldPointer()
11952 : EmitLValue(Arg).getPointer(*this);
11953 }
11954
11955 // Enable underlying preserve_*_access_index() generation.
11956 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
11957 IsInPreservedAIRegion = true;
11958 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getBitFieldPointer()
11959 : EmitLValue(Arg).getPointer(*this);
11960 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
11961
11962 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
11963 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
11964
11965 // Built the IR for the preserve_field_info intrinsic.
11966 llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration(
11967 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
11968 {FieldAddr->getType()});
11969 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
11970 }
11971 case BPF::BI__builtin_btf_type_id:
11972 case BPF::BI__builtin_preserve_type_info: {
11973 if (!getDebugInfo()) {
11974 CGM.Error(E->getExprLoc(), "using builtin function without -g");
11975 return nullptr;
11976 }
11977
11978 const Expr *Arg0 = E->getArg(0);
11979 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
11980 Arg0->getType(), Arg0->getExprLoc());
11981
11982 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
11983 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
11984 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
11985
11986 llvm::Function *FnDecl;
11987 if (BuiltinID == BPF::BI__builtin_btf_type_id)
11988 FnDecl = llvm::Intrinsic::getDeclaration(
11989 &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {});
11990 else
11991 FnDecl = llvm::Intrinsic::getDeclaration(
11992 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});
11993 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
11994 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
11995 return Fn;
11996 }
11997 case BPF::BI__builtin_preserve_enum_value: {
11998 if (!getDebugInfo()) {
11999 CGM.Error(E->getExprLoc(), "using builtin function without -g");
12000 return nullptr;
12001 }
12002
12003 const Expr *Arg0 = E->getArg(0);
12004 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
12005 Arg0->getType(), Arg0->getExprLoc());
12006
12007 // Find enumerator
12008 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
12009 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
12010 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
12011 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
12012
12013 auto &InitVal = Enumerator->getInitVal();
12014 std::string InitValStr;
12015 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
12016 InitValStr = std::to_string(InitVal.getSExtValue());
12017 else
12018 InitValStr = std::to_string(InitVal.getZExtValue());
12019 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
12020 Value *EnumStrVal = Builder.CreateGlobalStringPtr(EnumStr);
12021
12022 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12023 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
12024 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
12025
12026 llvm::Function *IntrinsicFn = llvm::Intrinsic::getDeclaration(
12027 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});
12028 CallInst *Fn =
12029 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
12030 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
12031 return Fn;
12032 }
12033 }
12034}
12035
12036llvm::Value *CodeGenFunction::
12038 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
12039 "Not a power-of-two sized vector!");
12040 bool AllConstants = true;
12041 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
12042 AllConstants &= isa<Constant>(Ops[i]);
12043
12044 // If this is a constant vector, create a ConstantVector.
12045 if (AllConstants) {
12047 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
12048 CstOps.push_back(cast<Constant>(Ops[i]));
12049 return llvm::ConstantVector::get(CstOps);
12050 }
12051
12052 // Otherwise, insertelement the values to build the vector.
12053 Value *Result = llvm::UndefValue::get(
12054 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
12055
12056 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
12057 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
12058
12059 return Result;
12060}
12061
12062// Convert the mask from an integer type to a vector of i1.
12064 unsigned NumElts) {
12065
12066 auto *MaskTy = llvm::FixedVectorType::get(
12067 CGF.Builder.getInt1Ty(),
12068 cast<IntegerType>(Mask->getType())->getBitWidth());
12069 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
12070
12071 // If we have less than 8 elements, then the starting mask was an i8 and
12072 // we need to extract down to the right number of elements.
12073 if (NumElts < 8) {
12074 int Indices[4];
12075 for (unsigned i = 0; i != NumElts; ++i)
12076 Indices[i] = i;
12077 MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
12078 makeArrayRef(Indices, NumElts),
12079 "extract");
12080 }
12081 return MaskVec;
12082}
12083
12085 Align Alignment) {
12086 // Cast the pointer to right type.
12087 Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
12088 llvm::PointerType::getUnqual(Ops[1]->getType()));
12089
12090 Value *MaskVec = getMaskVecValue(
12091 CGF, Ops[2],
12092 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
12093
12094 return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
12095}
12096
12098 Align Alignment) {
12099 // Cast the pointer to right type.
12100 llvm::Type *Ty = Ops[1]->getType();
12101 Value *Ptr =
12102 CGF.Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
12103
12104 Value *MaskVec = getMaskVecValue(
12105 CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
12106
12107 return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
12108}
12109
12111 ArrayRef<Value *> Ops) {
12112 auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
12113 llvm::Type *PtrTy = ResultTy->getElementType();
12114
12115 // Cast the pointer to element type.
12116 Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
12117 llvm::PointerType::getUnqual(PtrTy));
12118
12119 Value *MaskVec = getMaskVecValue(
12120 CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
12121
12122 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
12123 ResultTy);
12124 return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
12125}
12126
12129 bool IsCompress) {
12130 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
12131
12132 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
12133
12134 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
12135 : Intrinsic::x86_avx512_mask_expand;
12136 llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
12137 return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
12138}
12139
12141 ArrayRef<Value *> Ops) {
12142 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
12143 llvm::Type *PtrTy = ResultTy->getElementType();
12144
12145 // Cast the pointer to element type.
12146 Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
12147 llvm::PointerType::getUnqual(PtrTy));
12148
12149 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
12150
12151 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
12152 ResultTy);
12153 return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
12154}
12155
12156static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
12158 bool InvertLHS = false) {
12159 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
12160 Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
12161 Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
12162
12163 if (InvertLHS)
12164 LHS = CGF.Builder.CreateNot(LHS);
12165
12166 return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
12167 Ops[0]->getType());
12168}
12169
12171 Value *Amt, bool IsRight) {
12172 llvm::Type *Ty = Op0->getType();
12173
12174 // Amount may be scalar immediate, in which case create a splat vector.
12175 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
12176 // we only care about the lowest log2 bits anyway.
12177 if (Amt->getType() != Ty) {
12178 unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
12179 Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
12180 Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
12181 }
12182
12183 unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
12184 Function *F = CGF.CGM.getIntrinsic(IID, Ty);
12185 return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
12186}
12187
12189 bool IsSigned) {
12190 Value *Op0 = Ops[0];
12191 Value *Op1 = Ops[1];
12192 llvm::Type *Ty = Op0->getType();
12193 uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
12194
12195 CmpInst::Predicate Pred;
12196 switch (Imm) {
12197 case 0x0:
12198 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
12199 break;
12200 case 0x1:
12201 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
12202 break;
12203 case 0x2:
12204 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
12205 break;
12206 case 0x3:
12207 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
12208 break;
12209 case 0x4:
12210 Pred = ICmpInst::ICMP_EQ;
12211 break;
12212 case 0x5:
12213 Pred = ICmpInst::ICMP_NE;
12214 break;
12215 case 0x6:
12216 return llvm::Constant::getNullValue(Ty); // FALSE
12217 case 0x7:
12218 return llvm::Constant::getAllOnesValue(Ty); // TRUE
12219 default:
12220 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
12221 }
12222
12223 Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
12224 Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
12225 return Res;
12226}
12227
12229 Value *Mask, Value *Op0, Value *Op1) {
12230
12231 // If the mask is all ones just return first argument.
12232 if (const auto *C = dyn_cast<Constant>(Mask))
12233 if (C->isAllOnesValue())
12234 return Op0;
12235
12236 Mask = getMaskVecValue(
12237 CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
12238
12239 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
12240}
12241
12243 Value *Mask, Value *Op0, Value *Op1) {
12244 // If the mask is all ones just return first argument.
12245 if (const auto *C = dyn_cast<Constant>(Mask))
12246 if (C->isAllOnesValue())
12247 return Op0;
12248
12249 auto *MaskTy = llvm::FixedVectorType::get(
12250 CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
12251 Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
12252 Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
12253 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
12254}
12255
12257 unsigned NumElts, Value *MaskIn) {
12258 if (MaskIn) {
12259 const auto *C = dyn_cast<Constant>(MaskIn);
12260 if (!C || !C->isAllOnesValue())
12261 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
12262 }
12263
12264 if (NumElts < 8) {
12265 int Indices[8];
12266 for (unsigned i = 0; i != NumElts; ++i)
12267 Indices[i] = i;
12268 for (unsigned i = NumElts; i != 8; ++i)
12269 Indices[i] = i % NumElts + NumElts;
12270 Cmp = CGF.Builder.CreateShuffleVector(
12271 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
12272 }
12273
12274 return CGF.Builder.CreateBitCast(Cmp,
12275 IntegerType::get(CGF.getLLVMContext(),
12276 std::max(NumElts, 8U)));
12277}
12278
12280 bool Signed, ArrayRef<Value *> Ops) {
12281 assert((Ops.size() == 2 || Ops.size() == 4) &&
12282 "Unexpected number of arguments");
12283 unsigned NumElts =
12284 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
12285 Value *Cmp;
12286
12287 if (CC == 3) {
12288 Cmp = Constant::getNullValue(
12289 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
12290 } else if (CC == 7) {
12291 Cmp = Constant::getAllOnesValue(
12292 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
12293 } else {
12294 ICmpInst::Predicate Pred;
12295 switch (CC) {
12296 default: llvm_unreachable("Unknown condition code");
12297 case 0: Pred = ICmpInst::ICMP_EQ; break;
12298 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
12299 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
12300 case 4: Pred = ICmpInst::ICMP_NE; break;
12301 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
12302 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
12303 }
12304 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
12305 }
12306
12307 Value *MaskIn = nullptr;
12308 if (Ops.size() == 4)
12309 MaskIn = Ops[3];
12310
12311 return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
12312}
12313
12315 Value *Zero = Constant::getNullValue(In->getType());
12316 return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
12317}
12318
12320 ArrayRef<Value *> Ops, bool IsSigned) {
12321 unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
12322 llvm::Type *Ty = Ops[1]->getType();
12323
12324 Value *Res;
12325 if (Rnd != 4) {
12326 Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
12327 : Intrinsic::x86_avx512_uitofp_round;
12328 Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
12329 Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
12330 } else {
12331 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
12332 Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
12333 : CGF.Builder.CreateUIToFP(Ops[0], Ty);
12334 }
12335
12336 return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
12337}
12338
12339// Lowers X86 FMA intrinsics to IR.
12341 ArrayRef<Value *> Ops, unsigned BuiltinID,
12342 bool IsAddSub) {
12343
12344 bool Subtract = false;
12345 Intrinsic::ID IID = Intrinsic::not_intrinsic;
12346 switch (BuiltinID) {
12347 default: break;
12348 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
12349 Subtract = true;
12350 LLVM_FALLTHROUGH;
12351 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
12352 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
12353 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
12354 IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
12355 break;
12356 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
12357 Subtract = true;
12358 LLVM_FALLTHROUGH;
12359 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
12360 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
12361 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
12362 IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
12363 break;
12364 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
12365 Subtract = true;
12366 LLVM_FALLTHROUGH;
12367 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
12368 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
12369 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
12370 IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
12371 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
12372 Subtract = true;
12373 LLVM_FALLTHROUGH;
12374 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
12375 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
12376 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
12377 IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
12378 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
12379 Subtract = true;
12380 LLVM_FALLTHROUGH;
12381 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
12382 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
12383 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
12384 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
12385 break;
12386 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
12387 Subtract = true;
12388 LLVM_FALLTHROUGH;
12389 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
12390 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
12391 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
12392 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
12393 break;
12394 }
12395
12396 Value *A = Ops[0];
12397 Value *B = Ops[1];
12398 Value *C = Ops[2];
12399
12400 if (Subtract)
12401 C = CGF.Builder.CreateFNeg(C);
12402
12403 Value *Res;
12404
12405 // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
12406 if (IID != Intrinsic::not_intrinsic &&
12407 (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
12408 IsAddSub)) {
12409 Function *Intr = CGF.CGM.getIntrinsic(IID);
12410 Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
12411 } else {
12412 llvm::Type *Ty = A->getType();
12413 Function *FMA;
12414 if (CGF.Builder.getIsFPConstrained()) {
12415 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
12416 FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
12417 Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
12418 } else {
12419 FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
12420 Res = CGF.Builder.CreateCall(FMA, {A, B, C});
12421 }
12422 }
12423
12424 // Handle any required masking.
12425 Value *MaskFalseVal = nullptr;
12426 switch (BuiltinID) {
12427 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
12428 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
12429 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
12430 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
12431 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
12432 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
12433 MaskFalseVal = Ops[0];
12434 break;
12435 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
12436 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
12437 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
12438 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
12439 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
12440 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
12441 MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
12442 break;
12443 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
12444 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
12445 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
12446 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
12447 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
12448 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
12449 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
12450 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
12451 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
12452 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
12453 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
12454 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
12455 MaskFalseVal = Ops[2];
12456 break;
12457 }
12458
12459 if (MaskFalseVal)
12460 return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
12461
12462 return Res;
12463}
12464
12466 MutableArrayRef<Value *> Ops, Value *Upper,
12467 bool ZeroMask = false, unsigned PTIdx = 0,
12468 bool NegAcc = false) {
12469 unsigned Rnd = 4;
12470 if (Ops.size() > 4)
12471 Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
12472
12473 if (NegAcc)
12474 Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
12475
12476 Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
12477 Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
12478 Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
12479 Value *Res;
12480 if (Rnd != 4) {
12481 Intrinsic::ID IID;
12482
12483 switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
12484 case 16:
12485 IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
12486 break;
12487 case 32:
12488 IID = Intrinsic::x86_avx512_vfmadd_f32;
12489 break;
12490 case 64:
12491 IID = Intrinsic::x86_avx512_vfmadd_f64;
12492 break;
12493 default:
12494 llvm_unreachable("Unexpected size");
12495 }
12496 Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
12497 {Ops[0], Ops[1], Ops[2], Ops[4]});
12498 } else if (CGF.Builder.getIsFPConstrained()) {
12499 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
12500 Function *FMA = CGF.CGM.getIntrinsic(
12501 Intrinsic::experimental_constrained_fma, Ops[0]->getType());
12502 Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
12503 } else {
12504 Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
12505 Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
12506 }
12507 // If we have more than 3 arguments, we need to do masking.
12508 if (Ops.size() > 3) {
12509 Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
12510 : Ops[PTIdx];
12511
12512 // If we negated the accumulator and the its the PassThru value we need to
12513 // bypass the negate. Conveniently Upper should be the same thing in this
12514 // case.
12515 if (NegAcc && PTIdx == 2)
12516 PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
12517
12518 Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
12519 }
12520 return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
12521}
12522
12523static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
12524 ArrayRef<Value *> Ops) {
12525 llvm::Type *Ty = Ops[0]->getType();
12526 // Arguments have a vXi32 type so cast to vXi64.
12527 Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
12528 Ty->getPrimitiveSizeInBits() / 64);
12529 Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
12530 Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
12531
12532 if (IsSigned) {
12533 // Shift left then arithmetic shift right.
12534 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
12535 LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
12536 LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
12537 RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
12538 RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
12539 } else {
12540 // Clear the upper bits.
12541 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
12542 LHS = CGF.Builder.CreateAnd(LHS, Mask);
12543 RHS = CGF.Builder.CreateAnd(RHS, Mask);
12544 }
12545
12546 return CGF.Builder.CreateMul(LHS, RHS);
12547}
12548
12549// Emit a masked pternlog intrinsic. This only exists because the header has to
12550// use a macro and we aren't able to pass the input argument to a pternlog
12551// builtin and a select builtin without evaluating it twice.
12552static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
12553 ArrayRef<Value *> Ops) {
12554 llvm::Type *Ty = Ops[0]->getType();
12555
12556 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
12557 unsigned EltWidth = Ty->getScalarSizeInBits();
12558 Intrinsic::ID IID;
12559 if (VecWidth == 128 && EltWidth == 32)
12560 IID = Intrinsic::x86_avx512_pternlog_d_128;
12561 else if (VecWidth == 256 && EltWidth == 32)
12562 IID = Intrinsic::x86_avx512_pternlog_d_256;
12563 else if (VecWidth == 512 && EltWidth == 32)
12564 IID = Intrinsic::x86_avx512_pternlog_d_512;
12565 else if (VecWidth == 128 && EltWidth == 64)
12566 IID = Intrinsic::x86_avx512_pternlog_q_128;
12567 else if (VecWidth == 256 && EltWidth == 64)
12568 IID = Intrinsic::x86_avx512_pternlog_q_256;
12569 else if (VecWidth == 512 && EltWidth == 64)
12570 IID = Intrinsic::x86_avx512_pternlog_q_512;
12571 else
12572 llvm_unreachable("Unexpected intrinsic");
12573
12574 Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
12575 Ops.drop_back());
12576 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
12577 return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
12578}
12579
12581 llvm::Type *DstTy) {
12582 unsigned NumberOfElements =
12583 cast<llvm::FixedVectorType>(DstTy)->getNumElements();
12584 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
12585 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
12586}
12587
12588Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
12589 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
12590 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
12591 return EmitX86CpuIs(CPUStr);
12592}
12593
12594// Convert F16 halfs to floats.
12597 llvm::Type *DstTy) {
12598 assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
12599 "Unknown cvtph2ps intrinsic");
12600
12601 // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
12602 if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
12603 Function *F =
12604 CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
12605 return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
12606 }
12607
12608 unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
12609 Value *Src = Ops[0];
12610
12611 // Extract the subvector.
12612 if (NumDstElts !=
12613 cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
12614 assert(NumDstElts == 4 && "Unexpected vector size");
12615 Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3});
12616 }
12617
12618 // Bitcast from vXi16 to vXf16.
12619 auto *HalfTy = llvm::FixedVectorType::get(
12620 llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
12621 Src = CGF.Builder.CreateBitCast(Src, HalfTy);
12622
12623 // Perform the fp-extension.
12624 Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
12625
12626 if (Ops.size() >= 3)
12627 Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
12628 return Res;
12629}
12630
12631// Convert a BF16 to a float.
12633 const CallExpr *E,
12634 ArrayRef<Value *> Ops) {
12635 llvm::Type *Int32Ty = CGF.Builder.getInt32Ty();
12636 Value *ZeroExt = CGF.Builder.CreateZExt(Ops[0], Int32Ty);
12637 Value *Shl = CGF.Builder.CreateShl(ZeroExt, 16);
12638 llvm::Type *ResultType = CGF.ConvertType(E->getType());
12639 Value *BitCast = CGF.Builder.CreateBitCast(Shl, ResultType);
12640 return BitCast;
12641}
12642
12643Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
12644
12645 llvm::Type *Int32Ty = Builder.getInt32Ty();
12646
12647 // Matching the struct layout from the compiler-rt/libgcc structure that is
12648 // filled in:
12649 // unsigned int __cpu_vendor;
12650 // unsigned int __cpu_type;
12651 // unsigned int __cpu_subtype;
12652 // unsigned int __cpu_features[1];
12653 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
12654 llvm::ArrayType::get(Int32Ty, 1));
12655
12656 // Grab the global __cpu_model.
12657 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
12658 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
12659
12660 // Calculate the index needed to access the correct field based on the
12661 // range. Also adjust the expected value.
12662 unsigned Index;
12663 unsigned Value;
12664 std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
12665#define X86_VENDOR(ENUM, STRING) \
12666 .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
12667#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
12668 .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
12669#define X86_CPU_TYPE(ENUM, STR) \
12670 .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
12671#define X86_CPU_SUBTYPE(ENUM, STR) \
12672 .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
12673#include "llvm/Support/X86TargetParser.def"
12674 .Default({0, 0});
12675 assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
12676
12677 // Grab the appropriate field from __cpu_model.
12678 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
12679 ConstantInt::get(Int32Ty, Index)};
12680 llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs);
12681 CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,
12683
12684 // Check the value of the field against the requested value.
12685 return Builder.CreateICmpEQ(CpuValue,
12686 llvm::ConstantInt::get(Int32Ty, Value));
12687}
12688
12689Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
12690 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
12691 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
12692 return EmitX86CpuSupports(FeatureStr);
12693}
12694
12695Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
12696 return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
12697}
12698
12699llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) {
12700 uint32_t Features1 = Lo_32(FeaturesMask);
12701 uint32_t Features2 = Hi_32(FeaturesMask);
12702
12703 Value *Result = Builder.getTrue();
12704
12705 if (Features1 != 0) {
12706 // Matching the struct layout from the compiler-rt/libgcc structure that is
12707 // filled in:
12708 // unsigned int __cpu_vendor;
12709 // unsigned int __cpu_type;
12710 // unsigned int __cpu_subtype;
12711 // unsigned int __cpu_features[1];
12712 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
12713 llvm::ArrayType::get(Int32Ty, 1));
12714
12715 // Grab the global __cpu_model.
12716 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
12717 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
12718
12719 // Grab the first (0th) element from the field __cpu_features off of the
12720 // global in the struct STy.
12721 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
12722 Builder.getInt32(0)};
12723 Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
12724 Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,
12726
12727 // Check the value of the bit corresponding to the feature requested.
12728 Value *Mask = Builder.getInt32(Features1);
12729 Value *Bitset = Builder.CreateAnd(Features, Mask);
12730 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
12731 Result = Builder.CreateAnd(Result, Cmp);
12732 }
12733
12734 if (Features2 != 0) {
12735 llvm::Constant *CpuFeatures2 = CGM.CreateRuntimeVariable(Int32Ty,
12736 "__cpu_features2");
12737 cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
12738
12739 Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures2,
12741
12742 // Check the value of the bit corresponding to the feature requested.
12743 Value *Mask = Builder.getInt32(Features2);
12744 Value *Bitset = Builder.CreateAnd(Features, Mask);
12745 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
12746 Result = Builder.CreateAnd(Result, Cmp);
12747 }
12748
12749 return Result;
12750}
12751
12752Value *CodeGenFunction::EmitX86CpuInit() {
12753 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
12754 /*Variadic*/ false);
12755 llvm::FunctionCallee Func =
12756 CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
12757 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
12758 cast<llvm::GlobalValue>(Func.getCallee())
12759 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
12760 return Builder.CreateCall(Func);
12761}
12762
12763Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
12764 const CallExpr *E) {
12765 if (BuiltinID == X86::BI__builtin_cpu_is)
12766 return EmitX86CpuIs(E);
12767 if (BuiltinID == X86::BI__builtin_cpu_supports)
12768 return EmitX86CpuSupports(E);
12769 if (BuiltinID == X86::BI__builtin_cpu_init)
12770 return EmitX86CpuInit();
12771
12772 // Handle MSVC intrinsics before argument evaluation to prevent double
12773 // evaluation.
12774 if (Optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
12775 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
12776
12778 bool IsMaskFCmp = false;
12779 bool IsConjFMA = false;
12780
12781 // Find out if any arguments are required to be integer constant expressions.
12782 unsigned ICEArguments = 0;
12784 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
12785 assert(Error == ASTContext::GE_None && "Should not codegen an error");
12786
12787 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
12788 // If this is a normal argument, just emit it as a scalar.
12789 if ((ICEArguments & (1 << i)) == 0) {
12790 Ops.push_back(EmitScalarExpr(E->getArg(i)));
12791 continue;
12792 }
12793
12794 // If this is required to be a constant, constant fold it so that we know
12795 // that the generated intrinsic gets a ConstantInt.
12796 Ops.push_back(llvm::ConstantInt::get(
12798 }
12799
12800 // These exist so that the builtin that takes an immediate can be bounds
12801 // checked by clang to avoid passing bad immediates to the backend. Since
12802 // AVX has a larger immediate than SSE we would need separate builtins to
12803 // do the different bounds checking. Rather than create a clang specific
12804 // SSE only builtin, this implements eight separate builtins to match gcc
12805 // implementation.
12806 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
12807 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
12808 llvm::Function *F = CGM.getIntrinsic(ID);
12809 return Builder.CreateCall(F, Ops);
12810 };
12811
12812 // For the vector forms of FP comparisons, translate the builtins directly to
12813 // IR.
12814 // TODO: The builtins could be removed if the SSE header files used vector
12815 // extension comparisons directly (vector ordered/unordered may need
12816 // additional support via __builtin_isnan()).
12817 auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
12818 bool IsSignaling) {
12819 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
12820 Value *Cmp;
12821 if (IsSignaling)
12822 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
12823 else
12824 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
12825 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
12826 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
12827 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
12828 return Builder.CreateBitCast(Sext, FPVecTy);
12829 };
12830
12831 switch (BuiltinID) {
12832 default: return nullptr;
12833 case X86::BI_mm_prefetch: {
12834 Value *Address = Ops[0];
12835 ConstantInt *C = cast<ConstantInt>(Ops[1]);
12836 Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
12837 Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
12838 Value *Data = ConstantInt::get(Int32Ty, 1);
12839 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
12840 return Builder.CreateCall(F, {Address, RW, Locality, Data});
12841 }
12842 case X86::BI_mm_clflush: {
12843 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
12844 Ops[0]);
12845 }
12846 case X86::BI_mm_lfence: {
12847 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
12848 }
12849 case X86::BI_mm_mfence: {
12850 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
12851 }
12852 case X86::BI_mm_sfence: {
12853 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
12854 }
12855 case X86::BI_mm_pause: {
12856 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
12857 }
12858 case X86::BI__rdtsc: {
12859 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
12860 }
12861 case X86::BI__builtin_ia32_rdtscp: {
12862 Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
12863 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
12864 Ops[0]);
12865 return Builder.CreateExtractValue(Call, 0);
12866 }
12867 case X86::BI__builtin_ia32_lzcnt_u16:
12868 case X86::BI__builtin_ia32_lzcnt_u32:
12869 case X86::BI__builtin_ia32_lzcnt_u64: {
12870 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
12871 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
12872 }
12873 case X86::BI__builtin_ia32_tzcnt_u16:
12874 case X86::BI__builtin_ia32_tzcnt_u32:
12875 case X86::BI__builtin_ia32_tzcnt_u64: {
12876 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
12877 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
12878 }
12879 case X86::BI__builtin_ia32_undef128:
12880 case X86::BI__builtin_ia32_undef256:
12881 case X86::BI__builtin_ia32_undef512:
12882 // The x86 definition of "undef" is not the same as the LLVM definition
12883 // (PR32176). We leave optimizing away an unnecessary zero constant to the
12884 // IR optimizer and backend.
12885 // TODO: If we had a "freeze" IR instruction to generate a fixed undef
12886 // value, we should use that here instead of a zero.
12887 return llvm::Constant::getNullValue(ConvertType(E->getType()));
12888 case X86::BI__builtin_ia32_vec_init_v8qi:
12889 case X86::BI__builtin_ia32_vec_init_v4hi:
12890 case X86::BI__builtin_ia32_vec_init_v2si:
12891 return Builder.CreateBitCast(BuildVector(Ops),
12892 llvm::Type::getX86_MMXTy(getLLVMContext()));
12893 case X86::BI__builtin_ia32_vec_ext_v2si:
12894 case X86::BI__builtin_ia32_vec_ext_v16qi:
12895 case X86::BI__builtin_ia32_vec_ext_v8hi:
12896 case X86::BI__builtin_ia32_vec_ext_v4si:
12897 case X86::BI__builtin_ia32_vec_ext_v4sf:
12898 case X86::BI__builtin_ia32_vec_ext_v2di:
12899 case X86::BI__builtin_ia32_vec_ext_v32qi:
12900 case X86::BI__builtin_ia32_vec_ext_v16hi:
12901 case X86::BI__builtin_ia32_vec_ext_v8si:
12902 case X86::BI__builtin_ia32_vec_ext_v4di: {
12903 unsigned NumElts =
12904 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
12905 uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
12906 Index &= NumElts - 1;
12907 // These builtins exist so we can ensure the index is an ICE and in range.
12908 // Otherwise we could just do this in the header file.
12909 return Builder.CreateExtractElement(Ops[0], Index);
12910 }
12911 case X86::BI__builtin_ia32_vec_set_v16qi:
12912 case X86::BI__builtin_ia32_vec_set_v8hi:
12913 case X86::BI__builtin_ia32_vec_set_v4si:
12914 case X86::BI__builtin_ia32_vec_set_v2di:
12915 case X86::BI__builtin_ia32_vec_set_v32qi:
12916 case X86::BI__builtin_ia32_vec_set_v16hi:
12917 case X86::BI__builtin_ia32_vec_set_v8si:
12918 case X86::BI__builtin_ia32_vec_set_v4di: {
12919 unsigned NumElts =
12920 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
12921 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
12922 Index &= NumElts - 1;
12923 // These builtins exist so we can ensure the index is an ICE and in range.
12924 // Otherwise we could just do this in the header file.
12925 return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
12926 }
12927 case X86::BI_mm_setcsr:
12928 case X86::BI__builtin_ia32_ldmxcsr: {
12929 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
12930 Builder.CreateStore(Ops[0], Tmp);
12931 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
12932 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
12933 }
12934 case X86::BI_mm_getcsr:
12935 case X86::BI__builtin_ia32_stmxcsr: {
12936 Address Tmp = CreateMemTemp(E->getType());
12937 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
12938 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
12939 return Builder.CreateLoad(Tmp, "stmxcsr");
12940 }
12941 case X86::BI__builtin_ia32_xsave:
12942 case X86::BI__builtin_ia32_xsave64:
12943 case X86::BI__builtin_ia32_xrstor:
12944 case X86::BI__builtin_ia32_xrstor64:
12945 case X86::BI__builtin_ia32_xsaveopt:
12946 case X86::BI__builtin_ia32_xsaveopt64:
12947 case X86::BI__builtin_ia32_xrstors:
12948 case X86::BI__builtin_ia32_xrstors64:
12949 case X86::BI__builtin_ia32_xsavec:
12950 case X86::BI__builtin_ia32_xsavec64:
12951 case X86::BI__builtin_ia32_xsaves:
12952 case X86::BI__builtin_ia32_xsaves64:
12953 case X86::BI__builtin_ia32_xsetbv:
12954 case X86::BI_xsetbv: {
12955 Intrinsic::ID ID;
12956#define INTRINSIC_X86_XSAVE_ID(NAME) \
12957 case X86::BI__builtin_ia32_##NAME: \
12958 ID = Intrinsic::x86_##NAME; \
12959 break
12960 switch (BuiltinID) {
12961 default: llvm_unreachable("Unsupported intrinsic!");
12963 INTRINSIC_X86_XSAVE_ID(xsave64);
12964 INTRINSIC_X86_XSAVE_ID(xrstor);
12965 INTRINSIC_X86_XSAVE_ID(xrstor64);
12966 INTRINSIC_X86_XSAVE_ID(xsaveopt);
12967 INTRINSIC_X86_XSAVE_ID(xsaveopt64);
12968 INTRINSIC_X86_XSAVE_ID(xrstors);
12969 INTRINSIC_X86_XSAVE_ID(xrstors64);
12970 INTRINSIC_X86_XSAVE_ID(xsavec);
12971 INTRINSIC_X86_XSAVE_ID(xsavec64);
12972 INTRINSIC_X86_XSAVE_ID(xsaves);
12973 INTRINSIC_X86_XSAVE_ID(xsaves64);
12974 INTRINSIC_X86_XSAVE_ID(xsetbv);
12975 case X86::BI_xsetbv:
12976 ID = Intrinsic::x86_xsetbv;
12977 break;
12978 }
12979#undef INTRINSIC_X86_XSAVE_ID
12980 Value *Mhi = Builder.CreateTrunc(
12981 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
12982 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
12983 Ops[1] = Mhi;
12984 Ops.push_back(Mlo);
12985 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
12986 }
12987 case X86::BI__builtin_ia32_xgetbv:
12988 case X86::BI_xgetbv:
12989 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
12990 case X86::BI__builtin_ia32_storedqudi128_mask:
12991 case X86::BI__builtin_ia32_storedqusi128_mask:
12992 case X86::BI__builtin_ia32_storedquhi128_mask:
12993 case X86::BI__builtin_ia32_storedquqi128_mask:
12994 case X86::BI__builtin_ia32_storeupd128_mask:
12995 case X86::BI__builtin_ia32_storeups128_mask:
12996 case X86::BI__builtin_ia32_storedqudi256_mask:
12997 case X86::BI__builtin_ia32_storedqusi256_mask:
12998 case X86::BI__builtin_ia32_storedquhi256_mask:
12999 case X86::BI__builtin_ia32_storedquqi256_mask:
13000 case X86::BI__builtin_ia32_storeupd256_mask:
13001 case X86::BI__builtin_ia32_storeups256_mask:
13002 case X86::BI__builtin_ia32_storedqudi512_mask:
13003 case X86::BI__builtin_ia32_storedqusi512_mask:
13004 case X86::BI__builtin_ia32_storedquhi512_mask:
13005 case X86::BI__builtin_ia32_storedquqi512_mask:
13006 case X86::BI__builtin_ia32_storeupd512_mask:
13007 case X86::BI__builtin_ia32_storeups512_mask:
13008 return EmitX86MaskedStore(*this, Ops, Align(1));
13009
13010 case X86::BI__builtin_ia32_storesh128_mask:
13011 case X86::BI__builtin_ia32_storess128_mask:
13012 case X86::BI__builtin_ia32_storesd128_mask:
13013 return EmitX86MaskedStore(*this, Ops, Align(1));
13014
13015 case X86::BI__builtin_ia32_vpopcntb_128:
13016 case X86::BI__builtin_ia32_vpopcntd_128:
13017 case X86::BI__builtin_ia32_vpopcntq_128:
13018 case X86::BI__builtin_ia32_vpopcntw_128:
13019 case X86::BI__builtin_ia32_vpopcntb_256:
13020 case X86::BI__builtin_ia32_vpopcntd_256:
13021 case X86::BI__builtin_ia32_vpopcntq_256:
13022 case X86::BI__builtin_ia32_vpopcntw_256:
13023 case X86::BI__builtin_ia32_vpopcntb_512:
13024 case X86::BI__builtin_ia32_vpopcntd_512:
13025 case X86::BI__builtin_ia32_vpopcntq_512:
13026 case X86::BI__builtin_ia32_vpopcntw_512: {
13027 llvm::Type *ResultType = ConvertType(E->getType());
13028 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
13029 return Builder.CreateCall(F, Ops);
13030 }
13031 case X86::BI__builtin_ia32_cvtmask2b128:
13032 case X86::BI__builtin_ia32_cvtmask2b256:
13033 case X86::BI__builtin_ia32_cvtmask2b512:
13034 case X86::BI__builtin_ia32_cvtmask2w128:
13035 case X86::BI__builtin_ia32_cvtmask2w256:
13036 case X86::BI__builtin_ia32_cvtmask2w512:
13037 case X86::BI__builtin_ia32_cvtmask2d128:
13038 case X86::BI__builtin_ia32_cvtmask2d256:
13039 case X86::BI__builtin_ia32_cvtmask2d512:
13040 case X86::BI__builtin_ia32_cvtmask2q128:
13041 case X86::BI__builtin_ia32_cvtmask2q256:
13042 case X86::BI__builtin_ia32_cvtmask2q512:
13043 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
13044
13045 case X86::BI__builtin_ia32_cvtb2mask128:
13046 case X86::BI__builtin_ia32_cvtb2mask256:
13047 case X86::BI__builtin_ia32_cvtb2mask512:
13048 case X86::BI__builtin_ia32_cvtw2mask128:
13049 case X86::BI__builtin_ia32_cvtw2mask256:
13050 case X86::BI__builtin_ia32_cvtw2mask512:
13051 case X86::BI__builtin_ia32_cvtd2mask128:
13052 case X86::BI__builtin_ia32_cvtd2mask256:
13053 case X86::BI__builtin_ia32_cvtd2mask512:
13054 case X86::BI__builtin_ia32_cvtq2mask128:
13055 case X86::BI__builtin_ia32_cvtq2mask256:
13056 case X86::BI__builtin_ia32_cvtq2mask512:
13057 return EmitX86ConvertToMask(*this, Ops[0]);
13058
13059 case X86::BI__builtin_ia32_cvtdq2ps512_mask:
13060 case X86::BI__builtin_ia32_cvtqq2ps512_mask:
13061 case X86::BI__builtin_ia32_cvtqq2pd512_mask:
13062 case X86::BI__builtin_ia32_vcvtw2ph512_mask:
13063 case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
13064 case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
13065 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
13066 case X86::BI__builtin_ia32_cvtudq2ps512_mask:
13067 case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
13068 case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
13069 case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
13070 case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
13071 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
13072 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
13073
13074 case X86::BI__builtin_ia32_vfmaddss3:
13075 case X86::BI__builtin_ia32_vfmaddsd3:
13076 case X86::BI__builtin_ia32_vfmaddsh3_mask:
13077 case X86::BI__builtin_ia32_vfmaddss3_mask:
13078 case X86::BI__builtin_ia32_vfmaddsd3_mask:
13079 return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
13080 case X86::BI__builtin_ia32_vfmaddss:
13081 case X86::BI__builtin_ia32_vfmaddsd:
13082 return EmitScalarFMAExpr(*this, E, Ops,
13083 Constant::getNullValue(Ops[0]->getType()));
13084 case X86::BI__builtin_ia32_vfmaddsh3_maskz:
13085 case X86::BI__builtin_ia32_vfmaddss3_maskz:
13086 case X86::BI__builtin_ia32_vfmaddsd3_maskz:
13087 return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
13088 case X86::BI__builtin_ia32_vfmaddsh3_mask3:
13089 case X86::BI__builtin_ia32_vfmaddss3_mask3:
13090 case X86::BI__builtin_ia32_vfmaddsd3_mask3:
13091 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
13092 case X86::BI__builtin_ia32_vfmsubsh3_mask3:
13093 case X86::BI__builtin_ia32_vfmsubss3_mask3:
13094 case X86::BI__builtin_ia32_vfmsubsd3_mask3:
13095 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
13096 /*NegAcc*/ true);
13097 case X86::BI__builtin_ia32_vfmaddph:
13098 case X86::BI__builtin_ia32_vfmaddps:
13099 case X86::BI__builtin_ia32_vfmaddpd:
13100 case X86::BI__builtin_ia32_vfmaddph256:
13101 case X86::BI__builtin_ia32_vfmaddps256:
13102 case X86::BI__builtin_ia32_vfmaddpd256:
13103 case X86::BI__builtin_ia32_vfmaddph512_mask:
13104 case X86::BI__builtin_ia32_vfmaddph512_maskz:
13105 case X86::BI__builtin_ia32_vfmaddph512_mask3:
13106 case X86::BI__builtin_ia32_vfmaddps512_mask:
13107 case X86::BI__builtin_ia32_vfmaddps512_maskz:
13108 case X86::BI__builtin_ia32_vfmaddps512_mask3:
13109 case X86::BI__builtin_ia32_vfmsubps512_mask3:
13110 case X86::BI__builtin_ia32_vfmaddpd512_mask:
13111 case X86::BI__builtin_ia32_vfmaddpd512_maskz:
13112 case X86::BI__builtin_ia32_vfmaddpd512_mask3:
13113 case X86::BI__builtin_ia32_vfmsubpd512_mask3:
13114 case X86::BI__builtin_ia32_vfmsubph512_mask3:
13115 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
13116 case X86::BI__builtin_ia32_vfmaddsubph512_mask:
13117 case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
13118 case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
13119 case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
13120 case X86::BI__builtin_ia32_vfmaddsubps512_mask:
13121 case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
13122 case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
13123 case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
13124 case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
13125 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
13126 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
13127 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
13128 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
13129
13130 case X86::BI__builtin_ia32_movdqa32store128_mask:
13131 case X86::BI__builtin_ia32_movdqa64store128_mask:
13132 case X86::BI__builtin_ia32_storeaps128_mask:
13133 case X86::BI__builtin_ia32_storeapd128_mask:
13134 case X86::BI__builtin_ia32_movdqa32store256_mask:
13135 case X86::BI__builtin_ia32_movdqa64store256_mask:
13136 case X86::BI__builtin_ia32_storeaps256_mask:
13137 case X86::BI__builtin_ia32_storeapd256_mask:
13138 case X86::BI__builtin_ia32_movdqa32store512_mask:
13139 case X86::BI__builtin_ia32_movdqa64store512_mask:
13140 case X86::BI__builtin_ia32_storeaps512_mask:
13141 case X86::BI__builtin_ia32_storeapd512_mask:
13142 return EmitX86MaskedStore(
13143 *this, Ops,
13144 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
13145
13146 case X86::BI__builtin_ia32_loadups128_mask:
13147 case X86::BI__builtin_ia32_loadups256_mask:
13148 case X86::BI__builtin_ia32_loadups512_mask:
13149 case X86::BI__builtin_ia32_loadupd128_mask:
13150 case X86::BI__builtin_ia32_loadupd256_mask:
13151 case X86::BI__builtin_ia32_loadupd512_mask:
13152 case X86::BI__builtin_ia32_loaddquqi128_mask:
13153 case X86::BI__builtin_ia32_loaddquqi256_mask:
13154 case X86::BI__builtin_ia32_loaddquqi512_mask:
13155 case X86::BI__builtin_ia32_loaddquhi128_mask:
13156 case X86::BI__builtin_ia32_loaddquhi256_mask:
13157 case X86::BI__builtin_ia32_loaddquhi512_mask:
13158 case X86::BI__builtin_ia32_loaddqusi128_mask:
13159 case X86::BI__builtin_ia32_loaddqusi256_mask:
13160 case X86::BI__builtin_ia32_loaddqusi512_mask:
13161 case X86::BI__builtin_ia32_loaddqudi128_mask:
13162 case X86::BI__builtin_ia32_loaddqudi256_mask:
13163 case X86::BI__builtin_ia32_loaddqudi512_mask:
13164 return EmitX86MaskedLoad(*this, Ops, Align(1));
13165
13166 case X86::BI__builtin_ia32_loadsh128_mask:
13167 case X86::BI__builtin_ia32_loadss128_mask:
13168 case X86::BI__builtin_ia32_loadsd128_mask:
13169 return EmitX86MaskedLoad(*this, Ops, Align(1));
13170
13171 case X86::BI__builtin_ia32_loadaps128_mask:
13172 case X86::BI__builtin_ia32_loadaps256_mask:
13173 case X86::BI__builtin_ia32_loadaps512_mask:
13174 case X86::BI__builtin_ia32_loadapd128_mask:
13175 case X86::BI__builtin_ia32_loadapd256_mask:
13176 case X86::BI__builtin_ia32_loadapd512_mask:
13177 case X86::BI__builtin_ia32_movdqa32load128_mask:
13178 case X86::BI__builtin_ia32_movdqa32load256_mask:
13179 case X86::BI__builtin_ia32_movdqa32load512_mask:
13180 case X86::BI__builtin_ia32_movdqa64load128_mask:
13181 case X86::BI__builtin_ia32_movdqa64load256_mask:
13182 case X86::BI__builtin_ia32_movdqa64load512_mask:
13183 return EmitX86MaskedLoad(
13184 *this, Ops,
13185 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
13186
13187 case X86::BI__builtin_ia32_expandloaddf128_mask:
13188 case X86::BI__builtin_ia32_expandloaddf256_mask:
13189 case X86::BI__builtin_ia32_expandloaddf512_mask:
13190 case X86::BI__builtin_ia32_expandloadsf128_mask:
13191 case X86::BI__builtin_ia32_expandloadsf256_mask:
13192 case X86::BI__builtin_ia32_expandloadsf512_mask:
13193 case X86::BI__builtin_ia32_expandloaddi128_mask:
13194 case X86::BI__builtin_ia32_expandloaddi256_mask:
13195 case X86::BI__builtin_ia32_expandloaddi512_mask:
13196 case X86::BI__builtin_ia32_expandloadsi128_mask:
13197 case X86::BI__builtin_ia32_expandloadsi256_mask:
13198 case X86::BI__builtin_ia32_expandloadsi512_mask:
13199 case X86::BI__builtin_ia32_expandloadhi128_mask:
13200 case X86::BI__builtin_ia32_expandloadhi256_mask:
13201 case X86::BI__builtin_ia32_expandloadhi512_mask:
13202 case X86::BI__builtin_ia32_expandloadqi128_mask:
13203 case X86::BI__builtin_ia32_expandloadqi256_mask:
13204 case X86::BI__builtin_ia32_expandloadqi512_mask:
13205 return EmitX86ExpandLoad(*this, Ops);
13206
13207 case X86::BI__builtin_ia32_compressstoredf128_mask:
13208 case X86::BI__builtin_ia32_compressstoredf256_mask:
13209 case X86::BI__builtin_ia32_compressstoredf512_mask:
13210 case X86::BI__builtin_ia32_compressstoresf128_mask:
13211 case X86::BI__builtin_ia32_compressstoresf256_mask:
13212 case X86::BI__builtin_ia32_compressstoresf512_mask:
13213 case X86::BI__builtin_ia32_compressstoredi128_mask:
13214 case X86::BI__builtin_ia32_compressstoredi256_mask:
13215 case X86::BI__builtin_ia32_compressstoredi512_mask:
13216 case X86::BI__builtin_ia32_compressstoresi128_mask:
13217 case X86::BI__builtin_ia32_compressstoresi256_mask:
13218 case X86::BI__builtin_ia32_compressstoresi512_mask:
13219 case X86::BI__builtin_ia32_compressstorehi128_mask:
13220 case X86::BI__builtin_ia32_compressstorehi256_mask:
13221 case X86::BI__builtin_ia32_compressstorehi512_mask:
13222 case X86::BI__builtin_ia32_compressstoreqi128_mask:
13223 case X86::BI__builtin_ia32_compressstoreqi256_mask:
13224 case X86::BI__builtin_ia32_compressstoreqi512_mask:
13225 return EmitX86CompressStore(*this, Ops);
13226
13227 case X86::BI__builtin_ia32_expanddf128_mask:
13228 case X86::BI__builtin_ia32_expanddf256_mask:
13229 case X86::BI__builtin_ia32_expanddf512_mask:
13230 case X86::BI__builtin_ia32_expandsf128_mask:
13231 case X86::BI__builtin_ia32_expandsf256_mask:
13232 case X86::BI__builtin_ia32_expandsf512_mask:
13233 case X86::BI__builtin_ia32_expanddi128_mask:
13234 case X86::BI__builtin_ia32_expanddi256_mask:
13235 case X86::BI__builtin_ia32_expanddi512_mask:
13236 case X86::BI__builtin_ia32_expandsi128_mask:
13237 case X86::BI__builtin_ia32_expandsi256_mask:
13238 case X86::BI__builtin_ia32_expandsi512_mask:
13239 case X86::BI__builtin_ia32_expandhi128_mask:
13240 case X86::BI__builtin_ia32_expandhi256_mask:
13241 case X86::BI__builtin_ia32_expandhi512_mask:
13242 case X86::BI__builtin_ia32_expandqi128_mask:
13243 case X86::BI__builtin_ia32_expandqi256_mask:
13244 case X86::BI__builtin_ia32_expandqi512_mask:
13245 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
13246
13247 case X86::BI__builtin_ia32_compressdf128_mask:
13248 case X86::BI__builtin_ia32_compressdf256_mask:
13249 case X86::BI__builtin_ia32_compressdf512_mask:
13250 case X86::BI__builtin_ia32_compresssf128_mask:
13251 case X86::BI__builtin_ia32_compresssf256_mask:
13252 case X86::BI__builtin_ia32_compresssf512_mask:
13253 case X86::BI__builtin_ia32_compressdi128_mask:
13254 case X86::BI__builtin_ia32_compressdi256_mask:
13255 case X86::BI__builtin_ia32_compressdi512_mask:
13256 case X86::BI__builtin_ia32_compresssi128_mask:
13257 case X86::BI__builtin_ia32_compresssi256_mask:
13258 case X86::BI__builtin_ia32_compresssi512_mask:
13259 case X86::BI__builtin_ia32_compresshi128_mask:
13260 case X86::BI__builtin_ia32_compresshi256_mask:
13261 case X86::BI__builtin_ia32_compresshi512_mask:
13262 case X86::BI__builtin_ia32_compressqi128_mask:
13263 case X86::BI__builtin_ia32_compressqi256_mask:
13264 case X86::BI__builtin_ia32_compressqi512_mask:
13265 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
13266
13267 case X86::BI__builtin_ia32_gather3div2df:
13268 case X86::BI__builtin_ia32_gather3div2di:
13269 case X86::BI__builtin_ia32_gather3div4df:
13270 case X86::BI__builtin_ia32_gather3div4di:
13271 case X86::BI__builtin_ia32_gather3div4sf:
13272 case X86::BI__builtin_ia32_gather3div4si:
13273 case X86::BI__builtin_ia32_gather3div8sf:
13274 case X86::BI__builtin_ia32_gather3div8si:
13275 case X86::BI__builtin_ia32_gather3siv2df:
13276 case X86::BI__builtin_ia32_gather3siv2di:
13277 case X86::BI__builtin_ia32_gather3siv4df:
13278 case X86::BI__builtin_ia32_gather3siv4di:
13279 case X86::BI__builtin_ia32_gather3siv4sf:
13280 case X86::BI__builtin_ia32_gather3siv4si:
13281 case X86::BI__builtin_ia32_gather3siv8sf:
13282 case X86::BI__builtin_ia32_gather3siv8si:
13283 case X86::BI__builtin_ia32_gathersiv8df:
13284 case X86::BI__builtin_ia32_gathersiv16sf:
13285 case X86::BI__builtin_ia32_gatherdiv8df:
13286 case X86::BI__builtin_ia32_gatherdiv16sf:
13287 case X86::BI__builtin_ia32_gathersiv8di:
13288 case X86::BI__builtin_ia32_gathersiv16si:
13289 case X86::BI__builtin_ia32_gatherdiv8di:
13290 case X86::BI__builtin_ia32_gatherdiv16si: {
13291 Intrinsic::ID IID;
13292 switch (BuiltinID) {
13293 default: llvm_unreachable("Unexpected builtin");
13294 case X86::BI__builtin_ia32_gather3div2df:
13295 IID = Intrinsic::x86_avx512_mask_gather3div2_df;
13296 break;
13297 case X86::BI__builtin_ia32_gather3div2di:
13298 IID = Intrinsic::x86_avx512_mask_gather3div2_di;
13299 break;
13300 case X86::BI__builtin_ia32_gather3div4df:
13301 IID = Intrinsic::x86_avx512_mask_gather3div4_df;
13302 break;
13303 case X86::BI__builtin_ia32_gather3div4di:
13304 IID = Intrinsic::x86_avx512_mask_gather3div4_di;
13305 break;
13306 case X86::BI__builtin_ia32_gather3div4sf:
13307 IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
13308 break;
13309 case X86::BI__builtin_ia32_gather3div4si:
13310 IID = Intrinsic::x86_avx512_mask_gather3div4_si;
13311 break;
13312 case X86::BI__builtin_ia32_gather3div8sf:
13313 IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
13314 break;
13315 case X86::BI__builtin_ia32_gather3div8si:
13316 IID = Intrinsic::x86_avx512_mask_gather3div8_si;
13317 break;
13318 case X86::BI__builtin_ia32_gather3siv2df:
13319 IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
13320 break;
13321 case X86::BI__builtin_ia32_gather3siv2di:
13322 IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
13323 break;
13324 case X86::BI__builtin_ia32_gather3siv4df:
13325 IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
13326 break;
13327 case X86::BI__builtin_ia32_gather3siv4di:
13328 IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
13329 break;
13330 case X86::BI__builtin_ia32_gather3siv4sf:
13331 IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
13332 break;
13333 case X86::BI__builtin_ia32_gather3siv4si:
13334 IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
13335 break;
13336 case X86::BI__builtin_ia32_gather3siv8sf:
13337 IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
13338 break;
13339 case X86::BI__builtin_ia32_gather3siv8si:
13340 IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
13341 break;
13342 case X86::BI__builtin_ia32_gathersiv8df:
13343 IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
13344 break;
13345 case X86::BI__builtin_ia32_gathersiv16sf:
13346 IID = Intrinsic::x86_avx512_mask_gather_dps_512;
13347 break;
13348 case X86::BI__builtin_ia32_gatherdiv8df:
13349 IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
13350 break;
13351 case X86::BI__builtin_ia32_gatherdiv16sf:
13352 IID = Intrinsic::x86_avx512_mask_gather_qps_512;
13353 break;
13354 case X86::BI__builtin_ia32_gathersiv8di:
13355 IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
13356 break;
13357 case X86::BI__builtin_ia32_gathersiv16si:
13358 IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
13359 break;
13360 case X86::BI__builtin_ia32_gatherdiv8di:
13361 IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
13362 break;
13363 case X86::BI__builtin_ia32_gatherdiv16si:
13364 IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
13365 break;
13366 }
13367
13368 unsigned MinElts = std::min(
13369 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
13370 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
13371 Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
13372 Function *Intr = CGM.getIntrinsic(IID);
13373 return Builder.CreateCall(Intr, Ops);
13374 }
13375
13376 case X86::BI__builtin_ia32_scattersiv8df:
13377 case X86::BI__builtin_ia32_scattersiv16sf:
13378 case X86::BI__builtin_ia32_scatterdiv8df:
13379 case X86::BI__builtin_ia32_scatterdiv16sf:
13380 case X86::BI__builtin_ia32_scattersiv8di:
13381 case X86::BI__builtin_ia32_scattersiv16si:
13382 case X86::BI__builtin_ia32_scatterdiv8di:
13383 case X86::BI__builtin_ia32_scatterdiv16si:
13384 case X86::BI__builtin_ia32_scatterdiv2df:
13385 case X86::BI__builtin_ia32_scatterdiv2di:
13386 case X86::BI__builtin_ia32_scatterdiv4df:
13387 case X86::BI__builtin_ia32_scatterdiv4di:
13388 case X86::BI__builtin_ia32_scatterdiv4sf:
13389 case X86::BI__builtin_ia32_scatterdiv4si:
13390 case X86::BI__builtin_ia32_scatterdiv8sf:
13391 case X86::BI__builtin_ia32_scatterdiv8si:
13392 case X86::BI__builtin_ia32_scattersiv2df:
13393 case X86::BI__builtin_ia32_scattersiv2di:
13394 case X86::BI__builtin_ia32_scattersiv4df:
13395 case X86::BI__builtin_ia32_scattersiv4di:
13396 case X86::BI__builtin_ia32_scattersiv4sf:
13397 case X86::BI__builtin_ia32_scattersiv4si:
13398 case X86::BI__builtin_ia32_scattersiv8sf:
13399 case X86::BI__builtin_ia32_scattersiv8si: {
13400 Intrinsic::ID IID;
13401 switch (BuiltinID) {
13402 default: llvm_unreachable("Unexpected builtin");
13403 case X86::BI__builtin_ia32_scattersiv8df:
13404 IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
13405 break;
13406 case X86::BI__builtin_ia32_scattersiv16sf:
13407 IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
13408 break;
13409 case X86::BI__builtin_ia32_scatterdiv8df:
13410 IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
13411 break;
13412 case X86::BI__builtin_ia32_scatterdiv16sf:
13413 IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
13414 break;
13415 case X86::BI__builtin_ia32_scattersiv8di:
13416 IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
13417 break;
13418 case X86::BI__builtin_ia32_scattersiv16si:
13419 IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
13420 break;
13421 case X86::BI__builtin_ia32_scatterdiv8di:
13422 IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
13423 break;
13424 case X86::BI__builtin_ia32_scatterdiv16si:
13425 IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
13426 break;
13427 case X86::BI__builtin_ia32_scatterdiv2df:
13428 IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
13429 break;
13430 case X86::BI__builtin_ia32_scatterdiv2di:
13431 IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
13432 break;
13433 case X86::BI__builtin_ia32_scatterdiv4df:
13434 IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
13435 break;
13436 case X86::BI__builtin_ia32_scatterdiv4di:
13437 IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
13438 break;
13439 case X86::BI__builtin_ia32_scatterdiv4sf:
13440 IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
13441 break;
13442 case X86::BI__builtin_ia32_scatterdiv4si:
13443 IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
13444 break;
13445 case X86::BI__builtin_ia32_scatterdiv8sf:
13446 IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
13447 break;
13448 case X86::BI__builtin_ia32_scatterdiv8si:
13449 IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
13450 break;
13451 case X86::BI__builtin_ia32_scattersiv2df:
13452 IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
13453 break;
13454 case X86::BI__builtin_ia32_scattersiv2di:
13455 IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
13456 break;
13457 case X86::BI__builtin_ia32_scattersiv4df:
13458 IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
13459 break;
13460 case X86::BI__builtin_ia32_scattersiv4di:
13461 IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
13462 break;
13463 case X86::BI__builtin_ia32_scattersiv4sf:
13464 IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
13465 break;
13466 case X86::BI__builtin_ia32_scattersiv4si:
13467 IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
13468 break;
13469 case X86::BI__builtin_ia32_scattersiv8sf:
13470 IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
13471 break;
13472 case X86::BI__builtin_ia32_scattersiv8si:
13473 IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
13474 break;
13475 }
13476
13477 unsigned MinElts = std::min(
13478 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
13479 cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
13480 Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
13481 Function *Intr = CGM.getIntrinsic(IID);
13482 return Builder.CreateCall(Intr, Ops);
13483 }
13484
13485 case X86::BI__builtin_ia32_vextractf128_pd256:
13486 case X86::BI__builtin_ia32_vextractf128_ps256:
13487 case X86::BI__builtin_ia32_vextractf128_si256:
13488 case X86::BI__builtin_ia32_extract128i256:
13489 case X86::BI__builtin_ia32_extractf64x4_mask:
13490 case X86::BI__builtin_ia32_extractf32x4_mask:
13491 case X86::BI__builtin_ia32_extracti64x4_mask:
13492 case X86::BI__builtin_ia32_extracti32x4_mask:
13493 case X86::BI__builtin_ia32_extractf32x8_mask:
13494 case X86::BI__builtin_ia32_extracti32x8_mask:
13495 case X86::BI__builtin_ia32_extractf32x4_256_mask:
13496 case X86::BI__builtin_ia32_extracti32x4_256_mask:
13497 case X86::BI__builtin_ia32_extractf64x2_256_mask:
13498 case X86::BI__builtin_ia32_extracti64x2_256_mask:
13499 case X86::BI__builtin_ia32_extractf64x2_512_mask:
13500 case X86::BI__builtin_ia32_extracti64x2_512_mask: {
13501 auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
13502 unsigned NumElts = DstTy->getNumElements();
13503 unsigned SrcNumElts =
13504 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
13505 unsigned SubVectors = SrcNumElts / NumElts;
13506 unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
13507 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
13508 Index &= SubVectors - 1; // Remove any extra bits.
13509 Index *= NumElts;
13510
13511 int Indices[16];
13512 for (unsigned i = 0; i != NumElts; ++i)
13513 Indices[i] = i + Index;
13514
13515 Value *Res = Builder.CreateShuffleVector(Ops[0],
13516 makeArrayRef(Indices, NumElts),
13517 "extract");
13518
13519 if (Ops.size() == 4)
13520 Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
13521
13522 return Res;
13523 }
13524 case X86::BI__builtin_ia32_vinsertf128_pd256:
13525 case X86::BI__builtin_ia32_vinsertf128_ps256:
13526 case X86::BI__builtin_ia32_vinsertf128_si256:
13527 case X86::BI__builtin_ia32_insert128i256:
13528 case X86::BI__builtin_ia32_insertf64x4:
13529 case X86::BI__builtin_ia32_insertf32x4:
13530 case X86::BI__builtin_ia32_inserti64x4:
13531 case X86::BI__builtin_ia32_inserti32x4:
13532 case X86::BI__builtin_ia32_insertf32x8:
13533 case X86::BI__builtin_ia32_inserti32x8:
13534 case X86::BI__builtin_ia32_insertf32x4_256:
13535 case X86::BI__builtin_ia32_inserti32x4_256:
13536 case X86::BI__builtin_ia32_insertf64x2_256:
13537 case X86::BI__builtin_ia32_inserti64x2_256:
13538 case X86::BI__builtin_ia32_insertf64x2_512:
13539 case X86::BI__builtin_ia32_inserti64x2_512: {
13540 unsigned DstNumElts =
13541 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
13542 unsigned SrcNumElts =
13543 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
13544 unsigned SubVectors = DstNumElts / SrcNumElts;
13545 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
13546 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
13547 Index &= SubVectors - 1; // Remove any extra bits.
13548 Index *= SrcNumElts;
13549
13550 int Indices[16];
13551 for (unsigned i = 0; i != DstNumElts; ++i)
13552 Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
13553
13554 Value *Op1 = Builder.CreateShuffleVector(Ops[1],
13555 makeArrayRef(Indices, DstNumElts),
13556 "widen");
13557
13558 for (unsigned i = 0; i != DstNumElts; ++i) {
13559 if (i >= Index && i < (Index + SrcNumElts))
13560 Indices[i] = (i - Index) + DstNumElts;
13561 else
13562 Indices[i] = i;
13563 }
13564
13565 return Builder.CreateShuffleVector(Ops[0], Op1,
13566 makeArrayRef(Indices, DstNumElts),
13567 "insert");
13568 }
13569 case X86::BI__builtin_ia32_pmovqd512_mask:
13570 case X86::BI__builtin_ia32_pmovwb512_mask: {
13571 Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
13572 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
13573 }
13574 case X86::BI__builtin_ia32_pmovdb512_mask:
13575 case X86::BI__builtin_ia32_pmovdw512_mask:
13576 case X86::BI__builtin_ia32_pmovqw512_mask: {
13577 if (const auto *C = dyn_cast<Constant>(Ops[2]))
13578 if (C->isAllOnesValue())
13579 return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
13580
13581 Intrinsic::ID IID;
13582 switch (BuiltinID) {
13583 default: llvm_unreachable("Unsupported intrinsic!");
13584 case X86::BI__builtin_ia32_pmovdb512_mask:
13585 IID = Intrinsic::x86_avx512_mask_pmov_db_512;
13586 break;
13587 case X86::BI__builtin_ia32_pmovdw512_mask:
13588 IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
13589 break;
13590 case X86::BI__builtin_ia32_pmovqw512_mask:
13591 IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
13592 break;
13593 }
13594
13595 Function *Intr = CGM.getIntrinsic(IID);
13596 return Builder.CreateCall(Intr, Ops);
13597 }
13598 case X86::BI__builtin_ia32_pblendw128:
13599 case X86::BI__builtin_ia32_blendpd:
13600 case X86::BI__builtin_ia32_blendps:
13601 case X86::BI__builtin_ia32_blendpd256:
13602 case X86::BI__builtin_ia32_blendps256:
13603 case X86::BI__builtin_ia32_pblendw256:
13604 case X86::BI__builtin_ia32_pblendd128:
13605 case X86::BI__builtin_ia32_pblendd256: {
13606 unsigned NumElts =
13607 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
13608 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
13609
13610 int Indices[16];
13611 // If there are more than 8 elements, the immediate is used twice so make
13612 // sure we handle that.
13613 for (unsigned i = 0; i != NumElts; ++i)
13614 Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
13615
13616 return Builder.CreateShuffleVector(Ops[0], Ops[1],
13617 makeArrayRef(Indices, NumElts),
13618 "blend");
13619 }
13620 case X86::BI__builtin_ia32_pshuflw:
13621 case X86::BI__builtin_ia32_pshuflw256:
13622 case X86::BI__builtin_ia32_pshuflw512: {
13623 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
13624 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
13625 unsigned NumElts = Ty->getNumElements();
13626
13627 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
13628 Imm = (Imm & 0xff) * 0x01010101;
13629
13630 int Indices[32];
13631 for (unsigned l = 0; l != NumElts; l += 8) {
13632 for (unsigned i = 0; i != 4; ++i) {
13633 Indices[l + i] = l + (Imm & 3);
13634 Imm >>= 2;
13635 }
13636 for (unsigned i = 4; i != 8; ++i)
13637 Indices[l + i] = l + i;
13638 }
13639
13640 return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts),
13641 "pshuflw");
13642 }
13643 case X86::BI__builtin_ia32_pshufhw:
13644 case X86::BI__builtin_ia32_pshufhw256:
13645 case X86::BI__builtin_ia32_pshufhw512: {
13646 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
13647 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
13648 unsigned NumElts = Ty->getNumElements();
13649
13650 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
13651 Imm = (Imm & 0xff) * 0x01010101;
13652
13653 int Indices[32];
13654 for (unsigned l = 0; l != NumElts; l += 8) {
13655 for (unsigned i = 0; i != 4; ++i)
13656 Indices[l + i] = l + i;
13657 for (unsigned i = 4; i != 8; ++i) {
13658 Indices[l + i] = l + 4 + (Imm & 3);
13659 Imm >>= 2;
13660 }
13661 }
13662
13663 return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts),
13664 "pshufhw");
13665 }
13666 case X86::BI__builtin_ia32_pshufd:
13667 case X86::BI__builtin_ia32_pshufd256:
13668 case X86::BI__builtin_ia32_pshufd512:
13669 case X86::BI__builtin_ia32_vpermilpd:
13670 case X86::BI__builtin_ia32_vpermilps:
13671 case X86::BI__builtin_ia32_vpermilpd256:
13672 case X86::BI__builtin_ia32_vpermilps256:
13673 case X86::BI__builtin_ia32_vpermilpd512:
13674 case X86::BI__builtin_ia32_vpermilps512: {
13675 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
13676 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
13677 unsigned NumElts = Ty->getNumElements();
13678 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
13679 unsigned NumLaneElts = NumElts / NumLanes;
13680
13681 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
13682 Imm = (Imm & 0xff) * 0x01010101;
13683
13684 int Indices[16];
13685 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
13686 for (unsigned i = 0; i != NumLaneElts; ++i) {
13687 Indices[i + l] = (Imm % NumLaneElts) + l;
13688 Imm /= NumLaneElts;
13689 }
13690 }
13691
13692 return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts),
13693 "permil");
13694 }
13695 case X86::BI__builtin_ia32_shufpd:
13696 case X86::BI__builtin_ia32_shufpd256:
13697 case X86::BI__builtin_ia32_shufpd512:
13698 case X86::BI__builtin_ia32_shufps:
13699 case X86::BI__builtin_ia32_shufps256:
13700 case X86::BI__builtin_ia32_shufps512: {
13701 uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
13702 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
13703 unsigned NumElts = Ty->getNumElements();
13704 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
13705 unsigned NumLaneElts = NumElts / NumLanes;
13706
13707 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
13708 Imm = (Imm & 0xff) * 0x01010101;
13709
13710 int Indices[16];
13711 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
13712 for (unsigned i = 0; i != NumLaneElts; ++i) {
13713 unsigned Index = Imm % NumLaneElts;
13714 Imm /= NumLaneElts;
13715 if (i >= (NumLaneElts / 2))
13716 Index += NumElts;
13717 Indices[l + i] = l + Index;
13718 }
13719 }
13720
13721 return Builder.CreateShuffleVector(Ops[0], Ops[1],
13722 makeArrayRef(Indices, NumElts),
13723 "shufp");
13724 }
13725 case X86::BI__builtin_ia32_permdi256:
13726 case X86::BI__builtin_ia32_permdf256:
13727 case X86::BI__builtin_ia32_permdi512:
13728 case X86::BI__builtin_ia32_permdf512: {
13729 unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
13730 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
13731 unsigned NumElts = Ty->getNumElements();
13732
13733 // These intrinsics operate on 256-bit lanes of four 64-bit elements.
13734 int Indices[8];
13735 for (unsigned l = 0; l != NumElts; l += 4)
13736 for (unsigned i = 0; i != 4; ++i)
13737 Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
13738
13739 return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts),
13740 "perm");
13741 }
13742 case X86::BI__builtin_ia32_palignr128:
13743 case X86::BI__builtin_ia32_palignr256:
13744 case X86::BI__builtin_ia32_palignr512: {
13745 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
13746
13747 unsigned NumElts =
13748 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
13749 assert(NumElts % 16 == 0);
13750
13751 // If palignr is shifting the pair of vectors more than the size of two
13752 // lanes, emit zero.
13753 if (ShiftVal >= 32)
13754 return llvm::Constant::getNullValue(ConvertType(E->getType()));
13755
13756 // If palignr is shifting the pair of input vectors more than one lane,
13757 // but less than two lanes, convert to shifting in zeroes.
13758 if (ShiftVal > 16) {
13759 ShiftVal -= 16;
13760 Ops[1] = Ops[0];
13761 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
13762 }
13763
13764 int Indices[64];
13765 // 256-bit palignr operates on 128-bit lanes so we need to handle that
13766 for (unsigned l = 0; l != NumElts; l += 16) {
13767 for (unsigned i = 0; i != 16; ++i) {
13768 unsigned Idx = ShiftVal + i;
13769 if (Idx >= 16)
13770 Idx += NumElts - 16; // End of lane, switch operand.
13771 Indices[l + i] = Idx + l;
13772 }
13773 }
13774
13775 return Builder.CreateShuffleVector(Ops[1], Ops[0],
13776 makeArrayRef(Indices, NumElts),
13777 "palignr");
13778 }
13779 case X86::BI__builtin_ia32_alignd128:
13780 case X86::BI__builtin_ia32_alignd256:
13781 case X86::BI__builtin_ia32_alignd512:
13782 case X86::BI__builtin_ia32_alignq128:
13783 case X86::BI__builtin_ia32_alignq256:
13784 case X86::BI__builtin_ia32_alignq512: {
13785 unsigned NumElts =
13786 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
13787 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
13788
13789 // Mask the shift amount to width of a vector.
13790 ShiftVal &= NumElts - 1;
13791
13792 int Indices[16];
13793 for (unsigned i = 0; i != NumElts; ++i)
13794 Indices[i] = i + ShiftVal;
13795
13796 return Builder.CreateShuffleVector(Ops[1], Ops[0],
13797 makeArrayRef(Indices, NumElts),
13798 "valign");
13799 }
13800 case X86::BI__builtin_ia32_shuf_f32x4_256:
13801 case X86::BI__builtin_ia32_shuf_f64x2_256:
13802 case X86::BI__builtin_ia32_shuf_i32x4_256:
13803 case X86::BI__builtin_ia32_shuf_i64x2_256:
13804 case X86::BI__builtin_ia32_shuf_f32x4:
13805 case X86::BI__builtin_ia32_shuf_f64x2:
13806 case X86::BI__builtin_ia32_shuf_i32x4:
13807 case X86::BI__builtin_ia32_shuf_i64x2: {
13808 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
13809 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
13810 unsigned NumElts = Ty->getNumElements();
13811 unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
13812 unsigned NumLaneElts = NumElts / NumLanes;
13813
13814 int Indices[16];
13815 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
13816 unsigned Index = (Imm % NumLanes) * NumLaneElts;
13817 Imm /= NumLanes; // Discard the bits we just used.
13818 if (l >= (NumElts / 2))
13819 Index += NumElts; // Switch to other source.
13820 for (unsigned i = 0; i != NumLaneElts; ++i) {
13821 Indices[l + i] = Index + i;
13822 }
13823 }
13824
13825 return Builder.CreateShuffleVector(Ops[0], Ops[1],
13826 makeArrayRef(Indices, NumElts),
13827 "shuf");
13828 }
13829
13830 case X86::BI__builtin_ia32_vperm2f128_pd256:
13831 case X86::BI__builtin_ia32_vperm2f128_ps256:
13832 case X86::BI__builtin_ia32_vperm2f128_si256:
13833 case X86::BI__builtin_ia32_permti256: {
13834 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
13835 unsigned NumElts =
13836 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
13837
13838 // This takes a very simple approach since there are two lanes and a
13839 // shuffle can have 2 inputs. So we reserve the first input for the first
13840 // lane and the second input for the second lane. This may result in
13841 // duplicate sources, but this can be dealt with in the backend.
13842
13843 Value *OutOps[2];
13844 int Indices[8];
13845 for (unsigned l = 0; l != 2; ++l) {
13846 // Determine the source for this lane.
13847 if (Imm & (1 << ((l * 4) + 3)))
13848 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
13849 else if (Imm & (1 << ((l * 4) + 1)))
13850 OutOps[l] = Ops[1];
13851 else
13852 OutOps[l] = Ops[0];
13853
13854 for (unsigned i = 0; i != NumElts/2; ++i) {
13855 // Start with ith element of the source for this lane.
13856 unsigned Idx = (l * NumElts) + i;
13857 // If bit 0 of the immediate half is set, switch to the high half of
13858 // the source.
13859 if (Imm & (1 << (l * 4)))
13860 Idx += NumElts/2;
13861 Indices[(l * (NumElts/2)) + i] = Idx;
13862 }
13863 }
13864
13865 return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
13866 makeArrayRef(Indices, NumElts),
13867 "vperm");
13868 }
13869
13870 case X86::BI__builtin_ia32_pslldqi128_byteshift:
13871 case X86::BI__builtin_ia32_pslldqi256_byteshift:
13872 case X86::BI__builtin_ia32_pslldqi512_byteshift: {
13873 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
13874 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
13875 // Builtin type is vXi64 so multiply by 8 to get bytes.
13876 unsigned NumElts = ResultType->getNumElements() * 8;
13877
13878 // If pslldq is shifting the vector more than 15 bytes, emit zero.
13879 if (ShiftVal >= 16)
13880 return llvm::Constant::getNullValue(ResultType);
13881
13882 int Indices[64];
13883 // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
13884 for (unsigned l = 0; l != NumElts; l += 16) {
13885 for (unsigned i = 0; i != 16; ++i) {
13886 unsigned Idx = NumElts + i - ShiftVal;
13887 if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
13888 Indices[l + i] = Idx + l;
13889 }
13890 }
13891
13892 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
13893 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
13894 Value *Zero = llvm::Constant::getNullValue(VecTy);
13895 Value *SV = Builder.CreateShuffleVector(Zero, Cast,
13896 makeArrayRef(Indices, NumElts),
13897 "pslldq");
13898 return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
13899 }
13900 case X86::BI__builtin_ia32_psrldqi128_byteshift:
13901 case X86::BI__builtin_ia32_psrldqi256_byteshift:
13902 case X86::BI__builtin_ia32_psrldqi512_byteshift: {
13903 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
13904 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
13905 // Builtin type is vXi64 so multiply by 8 to get bytes.
13906 unsigned NumElts = ResultType->getNumElements() * 8;
13907
13908 // If psrldq is shifting the vector more than 15 bytes, emit zero.
13909 if (ShiftVal >= 16)
13910 return llvm::Constant::getNullValue(ResultType);
13911
13912 int Indices[64];
13913 // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
13914 for (unsigned l = 0; l != NumElts; l += 16) {
13915 for (unsigned i = 0; i != 16; ++i) {
13916 unsigned Idx = i + ShiftVal;
13917 if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
13918 Indices[l + i] = Idx + l;
13919 }
13920 }
13921
13922 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
13923 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
13924 Value *Zero = llvm::Constant::getNullValue(VecTy);
13925 Value *SV = Builder.CreateShuffleVector(Cast, Zero,
13926 makeArrayRef(Indices, NumElts),
13927 "psrldq");
13928 return Builder.CreateBitCast(SV, ResultType, "cast");
13929 }
13930 case X86::BI__builtin_ia32_kshiftliqi:
13931 case X86::BI__builtin_ia32_kshiftlihi:
13932 case X86::BI__builtin_ia32_kshiftlisi:
13933 case X86::BI__builtin_ia32_kshiftlidi: {
13934 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
13935 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
13936
13937 if (ShiftVal >= NumElts)
13938 return llvm::Constant::getNullValue(Ops[0]->getType());
13939
13940 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
13941
13942 int Indices[64];
13943 for (unsigned i = 0; i != NumElts; ++i)
13944 Indices[i] = NumElts + i - ShiftVal;
13945
13946 Value *Zero = llvm::Constant::getNullValue(In->getType());
13947 Value *SV = Builder.CreateShuffleVector(Zero, In,
13948 makeArrayRef(Indices, NumElts),
13949 "kshiftl");
13950 return Builder.CreateBitCast(SV, Ops[0]->getType());
13951 }
13952 case X86::BI__builtin_ia32_kshiftriqi:
13953 case X86::BI__builtin_ia32_kshiftrihi:
13954 case X86::BI__builtin_ia32_kshiftrisi:
13955 case X86::BI__builtin_ia32_kshiftridi: {
13956 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
13957 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
13958
13959 if (ShiftVal >= NumElts)
13960 return llvm::Constant::getNullValue(Ops[0]->getType());
13961
13962 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
13963
13964 int Indices[64];
13965 for (unsigned i = 0; i != NumElts; ++i)
13966 Indices[i] = i + ShiftVal;
13967
13968 Value *Zero = llvm::Constant::getNullValue(In->getType());
13969 Value *SV = Builder.CreateShuffleVector(In, Zero,
13970 makeArrayRef(Indices, NumElts),
13971 "kshiftr");
13972 return Builder.CreateBitCast(SV, Ops[0]->getType());
13973 }
13974 case X86::BI__builtin_ia32_movnti:
13975 case X86::BI__builtin_ia32_movnti64:
13976 case X86::BI__builtin_ia32_movntsd:
13977 case X86::BI__builtin_ia32_movntss: {
13978 llvm::MDNode *Node = llvm::MDNode::get(
13979 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
13980
13981 Value *Ptr = Ops[0];
13982 Value *Src = Ops[1];
13983
13984 // Extract the 0'th element of the source vector.
13985 if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
13986 BuiltinID == X86::BI__builtin_ia32_movntss)
13987 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
13988
13989 // Convert the type of the pointer to a pointer to the stored type.
13990 Value *BC = Builder.CreateBitCast(
13991 Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
13992
13993 // Unaligned nontemporal store of the scalar value.
13994 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
13995 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
13996 SI->setAlignment(llvm::Align(1));
13997 return SI;
13998 }
13999 // Rotate is a special case of funnel shift - 1st 2 args are the same.
14000 case X86::BI__builtin_ia32_vprotb:
14001 case X86::BI__builtin_ia32_vprotw:
14002 case X86::BI__builtin_ia32_vprotd:
14003 case X86::BI__builtin_ia32_vprotq:
14004 case X86::BI__builtin_ia32_vprotbi:
14005 case X86::BI__builtin_ia32_vprotwi:
14006 case X86::BI__builtin_ia32_vprotdi:
14007 case X86::BI__builtin_ia32_vprotqi:
14008 case X86::BI__builtin_ia32_prold128:
14009 case X86::BI__builtin_ia32_prold256:
14010 case X86::BI__builtin_ia32_prold512:
14011 case X86::BI__builtin_ia32_prolq128:
14012 case X86::BI__builtin_ia32_prolq256:
14013 case X86::BI__builtin_ia32_prolq512:
14014 case X86::BI__builtin_ia32_prolvd128:
14015 case X86::BI__builtin_ia32_prolvd256:
14016 case X86::BI__builtin_ia32_prolvd512:
14017 case X86::BI__builtin_ia32_prolvq128:
14018 case X86::BI__builtin_ia32_prolvq256:
14019 case X86::BI__builtin_ia32_prolvq512:
14020 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
14021 case X86::BI__builtin_ia32_prord128:
14022 case X86::BI__builtin_ia32_prord256:
14023 case X86::BI__builtin_ia32_prord512:
14024 case X86::BI__builtin_ia32_prorq128:
14025 case X86::BI__builtin_ia32_prorq256:
14026 case X86::BI__builtin_ia32_prorq512:
14027 case X86::BI__builtin_ia32_prorvd128:
14028 case X86::BI__builtin_ia32_prorvd256:
14029 case X86::BI__builtin_ia32_prorvd512:
14030 case X86::BI__builtin_ia32_prorvq128:
14031 case X86::BI__builtin_ia32_prorvq256:
14032 case X86::BI__builtin_ia32_prorvq512:
14033 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
14034 case X86::BI__builtin_ia32_selectb_128:
14035 case X86::BI__builtin_ia32_selectb_256:
14036 case X86::BI__builtin_ia32_selectb_512:
14037 case X86::BI__builtin_ia32_selectw_128:
14038 case X86::BI__builtin_ia32_selectw_256:
14039 case X86::BI__builtin_ia32_selectw_512:
14040 case X86::BI__builtin_ia32_selectd_128:
14041 case X86::BI__builtin_ia32_selectd_256:
14042 case X86::BI__builtin_ia32_selectd_512:
14043 case X86::BI__builtin_ia32_selectq_128:
14044 case X86::BI__builtin_ia32_selectq_256:
14045 case X86::BI__builtin_ia32_selectq_512:
14046 case X86::BI__builtin_ia32_selectph_128:
14047 case X86::BI__builtin_ia32_selectph_256:
14048 case X86::BI__builtin_ia32_selectph_512:
14049 case X86::BI__builtin_ia32_selectps_128:
14050 case X86::BI__builtin_ia32_selectps_256:
14051 case X86::BI__builtin_ia32_selectps_512:
14052 case X86::BI__builtin_ia32_selectpd_128:
14053 case X86::BI__builtin_ia32_selectpd_256:
14054 case X86::BI__builtin_ia32_selectpd_512:
14055 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
14056 case X86::BI__builtin_ia32_selectsh_128:
14057 case X86::BI__builtin_ia32_selectss_128:
14058 case X86::BI__builtin_ia32_selectsd_128: {
14059 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
14060 Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
14061 A = EmitX86ScalarSelect(*this, Ops[0], A, B);
14062 return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
14063 }
14064 case X86::BI__builtin_ia32_cmpb128_mask:
14065 case X86::BI__builtin_ia32_cmpb256_mask:
14066 case X86::BI__builtin_ia32_cmpb512_mask:
14067 case X86::BI__builtin_ia32_cmpw128_mask:
14068 case X86::BI__builtin_ia32_cmpw256_mask:
14069 case X86::BI__builtin_ia32_cmpw512_mask:
14070 case X86::BI__builtin_ia32_cmpd128_mask:
14071 case X86::BI__builtin_ia32_cmpd256_mask:
14072 case X86::BI__builtin_ia32_cmpd512_mask:
14073 case X86::BI__builtin_ia32_cmpq128_mask:
14074 case X86::BI__builtin_ia32_cmpq256_mask:
14075 case X86::BI__builtin_ia32_cmpq512_mask: {
14076 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
14077 return EmitX86MaskedCompare(*this, CC, true, Ops);
14078 }
14079 case X86::BI__builtin_ia32_ucmpb128_mask:
14080 case X86::BI__builtin_ia32_ucmpb256_mask:
14081 case X86::BI__builtin_ia32_ucmpb512_mask:
14082 case X86::BI__builtin_ia32_ucmpw128_mask:
14083 case X86::BI__builtin_ia32_ucmpw256_mask:
14084 case X86::BI__builtin_ia32_ucmpw512_mask:
14085 case X86::BI__builtin_ia32_ucmpd128_mask:
14086 case X86::BI__builtin_ia32_ucmpd256_mask:
14087 case X86::BI__builtin_ia32_ucmpd512_mask:
14088 case X86::BI__builtin_ia32_ucmpq128_mask:
14089 case X86::BI__builtin_ia32_ucmpq256_mask:
14090 case X86::BI__builtin_ia32_ucmpq512_mask: {
14091 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
14092 return EmitX86MaskedCompare(*this, CC, false, Ops);
14093 }
14094 case X86::BI__builtin_ia32_vpcomb:
14095 case X86::BI__builtin_ia32_vpcomw:
14096 case X86::BI__builtin_ia32_vpcomd:
14097 case X86::BI__builtin_ia32_vpcomq:
14098 return EmitX86vpcom(*this, Ops, true);
14099 case X86::BI__builtin_ia32_vpcomub:
14100 case X86::BI__builtin_ia32_vpcomuw:
14101 case X86::BI__builtin_ia32_vpcomud:
14102 case X86::BI__builtin_ia32_vpcomuq:
14103 return EmitX86vpcom(*this, Ops, false);
14104
14105 case X86::BI__builtin_ia32_kortestcqi:
14106 case X86::BI__builtin_ia32_kortestchi:
14107 case X86::BI__builtin_ia32_kortestcsi:
14108 case X86::BI__builtin_ia32_kortestcdi: {
14109 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
14110 Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
14111 Value *Cmp = Builder.CreateICmpEQ(Or, C);
14112 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
14113 }
14114 case X86::BI__builtin_ia32_kortestzqi:
14115 case X86::BI__builtin_ia32_kortestzhi:
14116 case X86::BI__builtin_ia32_kortestzsi:
14117 case X86::BI__builtin_ia32_kortestzdi: {
14118 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
14119 Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
14120 Value *Cmp = Builder.CreateICmpEQ(Or, C);
14121 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
14122 }
14123
14124 case X86::BI__builtin_ia32_ktestcqi:
14125 case X86::BI__builtin_ia32_ktestzqi:
14126 case X86::BI__builtin_ia32_ktestchi:
14127 case X86::BI__builtin_ia32_ktestzhi:
14128 case X86::BI__builtin_ia32_ktestcsi:
14129 case X86::BI__builtin_ia32_ktestzsi:
14130 case X86::BI__builtin_ia32_ktestcdi:
14131 case X86::BI__builtin_ia32_ktestzdi: {
14132 Intrinsic::ID IID;
14133 switch (BuiltinID) {
14134 default: llvm_unreachable("Unsupported intrinsic!");
14135 case X86::BI__builtin_ia32_ktestcqi:
14136 IID = Intrinsic::x86_avx512_ktestc_b;
14137 break;
14138 case X86::BI__builtin_ia32_ktestzqi:
14139 IID = Intrinsic::x86_avx512_ktestz_b;
14140 break;
14141 case X86::BI__builtin_ia32_ktestchi:
14142 IID = Intrinsic::x86_avx512_ktestc_w;
14143 break;
14144 case X86::BI__builtin_ia32_ktestzhi:
14145 IID = Intrinsic::x86_avx512_ktestz_w;
14146 break;
14147 case X86::BI__builtin_ia32_ktestcsi:
14148 IID = Intrinsic::x86_avx512_ktestc_d;
14149 break;
14150 case X86::BI__builtin_ia32_ktestzsi:
14151 IID = Intrinsic::x86_avx512_ktestz_d;
14152 break;
14153 case X86::BI__builtin_ia32_ktestcdi:
14154 IID = Intrinsic::x86_avx512_ktestc_q;
14155 break;
14156 case X86::BI__builtin_ia32_ktestzdi:
14157 IID = Intrinsic::x86_avx512_ktestz_q;
14158 break;
14159 }
14160
14161 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
14162 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
14163 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
14164 Function *Intr = CGM.getIntrinsic(IID);
14165 return Builder.CreateCall(Intr, {LHS, RHS});
14166 }
14167
14168 case X86::BI__builtin_ia32_kaddqi:
14169 case X86::BI__builtin_ia32_kaddhi:
14170 case X86::BI__builtin_ia32_kaddsi:
14171 case X86::BI__builtin_ia32_kadddi: {
14172 Intrinsic::ID IID;
14173 switch (BuiltinID) {
14174 default: llvm_unreachable("Unsupported intrinsic!");
14175 case X86::BI__builtin_ia32_kaddqi:
14176 IID = Intrinsic::x86_avx512_kadd_b;
14177 break;
14178 case X86::BI__builtin_ia32_kaddhi:
14179 IID = Intrinsic::x86_avx512_kadd_w;
14180 break;
14181 case X86::BI__builtin_ia32_kaddsi:
14182 IID = Intrinsic::x86_avx512_kadd_d;
14183 break;
14184 case X86::BI__builtin_ia32_kadddi:
14185 IID = Intrinsic::x86_avx512_kadd_q;
14186 break;
14187 }
14188
14189 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
14190 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
14191 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
14192 Function *Intr = CGM.getIntrinsic(IID);
14193 Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
14194 return Builder.CreateBitCast(Res, Ops[0]->getType());
14195 }
14196 case X86::BI__builtin_ia32_kandqi:
14197 case X86::BI__builtin_ia32_kandhi:
14198 case X86::BI__builtin_ia32_kandsi:
14199 case X86::BI__builtin_ia32_kanddi:
14200 return EmitX86MaskLogic(*this, Instruction::And, Ops);
14201 case X86::BI__builtin_ia32_kandnqi:
14202 case X86::BI__builtin_ia32_kandnhi:
14203 case X86::BI__builtin_ia32_kandnsi:
14204 case X86::BI__builtin_ia32_kandndi:
14205 return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
14206 case X86::BI__builtin_ia32_korqi:
14207 case X86::BI__builtin_ia32_korhi:
14208 case X86::BI__builtin_ia32_korsi:
14209 case X86::BI__builtin_ia32_kordi:
14210 return EmitX86MaskLogic(*this, Instruction::Or, Ops);
14211 case X86::BI__builtin_ia32_kxnorqi:
14212 case X86::BI__builtin_ia32_kxnorhi:
14213 case X86::BI__builtin_ia32_kxnorsi:
14214 case X86::BI__builtin_ia32_kxnordi:
14215 return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
14216 case X86::BI__builtin_ia32_kxorqi:
14217 case X86::BI__builtin_ia32_kxorhi:
14218 case X86::BI__builtin_ia32_kxorsi:
14219 case X86::BI__builtin_ia32_kxordi:
14220 return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
14221 case X86::BI__builtin_ia32_knotqi:
14222 case X86::BI__builtin_ia32_knothi:
14223 case X86::BI__builtin_ia32_knotsi:
14224 case X86::BI__builtin_ia32_knotdi: {
14225 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
14226 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
14227 return Builder.CreateBitCast(Builder.CreateNot(Res),
14228 Ops[0]->getType());
14229 }
14230 case X86::BI__builtin_ia32_kmovb:
14231 case X86::BI__builtin_ia32_kmovw:
14232 case X86::BI__builtin_ia32_kmovd:
14233 case X86::BI__builtin_ia32_kmovq: {
14234 // Bitcast to vXi1 type and then back to integer. This gets the mask
14235 // register type into the IR, but might be optimized out depending on
14236 // what's around it.
14237 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
14238 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
14239 return Builder.CreateBitCast(Res, Ops[0]->getType());
14240 }
14241
14242 case X86::BI__builtin_ia32_kunpckdi:
14243 case X86::BI__builtin_ia32_kunpcksi:
14244 case X86::BI__builtin_ia32_kunpckhi: {
14245 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
14246 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
14247 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
14248 int Indices[64];
14249 for (unsigned i = 0; i != NumElts; ++i)
14250 Indices[i] = i;
14251
14252 // First extract half of each vector. This gives better codegen than
14253 // doing it in a single shuffle.
14254 LHS = Builder.CreateShuffleVector(LHS, LHS,
14255 makeArrayRef(Indices, NumElts / 2));
14256 RHS = Builder.CreateShuffleVector(RHS, RHS,
14257 makeArrayRef(Indices, NumElts / 2));
14258 // Concat the vectors.
14259 // NOTE: Operands are swapped to match the intrinsic definition.
14260 Value *Res = Builder.CreateShuffleVector(RHS, LHS,
14261 makeArrayRef(Indices, NumElts));
14262 return Builder.CreateBitCast(Res, Ops[0]->getType());
14263 }
14264
14265 case X86::BI__builtin_ia32_vplzcntd_128:
14266 case X86::BI__builtin_ia32_vplzcntd_256:
14267 case X86::BI__builtin_ia32_vplzcntd_512:
14268 case X86::BI__builtin_ia32_vplzcntq_128:
14269 case X86::BI__builtin_ia32_vplzcntq_256:
14270 case X86::BI__builtin_ia32_vplzcntq_512: {
14271 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
14272 return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
14273 }
14274 case X86::BI__builtin_ia32_sqrtss:
14275 case X86::BI__builtin_ia32_sqrtsd: {
14276 Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
14277 Function *F;
14278 if (Builder.getIsFPConstrained()) {
14279 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
14280 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
14281 A->getType());
14282 A = Builder.CreateConstrainedFPCall(F, {A});
14283 } else {
14284 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
14285 A = Builder.CreateCall(F, {A});
14286 }
14287 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
14288 }
14289 case X86::BI__builtin_ia32_sqrtsh_round_mask:
14290 case X86::BI__builtin_ia32_sqrtsd_round_mask:
14291 case X86::BI__builtin_ia32_sqrtss_round_mask: {
14292 unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
14293 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
14294 // otherwise keep the intrinsic.
14295 if (CC != 4) {
14296 Intrinsic::ID IID;
14297
14298 switch (BuiltinID) {
14299 default:
14300 llvm_unreachable("Unsupported intrinsic!");
14301 case X86::BI__builtin_ia32_sqrtsh_round_mask:
14302 IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
14303 break;
14304 case X86::BI__builtin_ia32_sqrtsd_round_mask:
14305 IID = Intrinsic::x86_avx512_mask_sqrt_sd;
14306 break;
14307 case X86::BI__builtin_ia32_sqrtss_round_mask:
14308 IID = Intrinsic::x86_avx512_mask_sqrt_ss;
14309 break;
14310 }
14311 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
14312 }
14313 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
14314 Function *F;
14315 if (Builder.getIsFPConstrained()) {
14316 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
14317 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
14318 A->getType());
14319 A = Builder.CreateConstrainedFPCall(F, A);
14320 } else {
14321 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
14322 A = Builder.CreateCall(F, A);
14323 }
14324 Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
14325 A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
14326 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
14327 }
14328 case X86::BI__builtin_ia32_sqrtpd256:
14329 case X86::BI__builtin_ia32_sqrtpd:
14330 case X86::BI__builtin_ia32_sqrtps256:
14331 case X86::BI__builtin_ia32_sqrtps:
14332 case X86::BI__builtin_ia32_sqrtph256:
14333 case X86::BI__builtin_ia32_sqrtph:
14334 case X86::BI__builtin_ia32_sqrtph512:
14335 case X86::BI__builtin_ia32_sqrtps512:
14336 case X86::BI__builtin_ia32_sqrtpd512: {
14337 if (Ops.size() == 2) {
14338 unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
14339 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
14340 // otherwise keep the intrinsic.
14341 if (CC != 4) {
14342 Intrinsic::ID IID;
14343
14344 switch (BuiltinID) {
14345 default:
14346 llvm_unreachable("Unsupported intrinsic!");
14347 case X86::BI__builtin_ia32_sqrtph512:
14348 IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
14349 break;
14350 case X86::BI__builtin_ia32_sqrtps512:
14351 IID = Intrinsic::x86_avx512_sqrt_ps_512;
14352 break;
14353 case X86::BI__builtin_ia32_sqrtpd512:
14354 IID = Intrinsic::x86_avx512_sqrt_pd_512;
14355 break;
14356 }
14357 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
14358 }
14359 }
14360 if (Builder.getIsFPConstrained()) {
14361 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
14362 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
14363 Ops[0]->getType());
14364 return Builder.CreateConstrainedFPCall(F, Ops[0]);
14365 } else {
14366 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
14367 return Builder.CreateCall(F, Ops[0]);
14368 }
14369 }
14370
14371 case X86::BI__builtin_ia32_pmuludq128:
14372 case X86::BI__builtin_ia32_pmuludq256:
14373 case X86::BI__builtin_ia32_pmuludq512:
14374 return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
14375
14376 case X86::BI__builtin_ia32_pmuldq128:
14377 case X86::BI__builtin_ia32_pmuldq256:
14378 case X86::BI__builtin_ia32_pmuldq512:
14379 return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
14380
14381 case X86::BI__builtin_ia32_pternlogd512_mask:
14382 case X86::BI__builtin_ia32_pternlogq512_mask:
14383 case X86::BI__builtin_ia32_pternlogd128_mask:
14384 case X86::BI__builtin_ia32_pternlogd256_mask:
14385 case X86::BI__builtin_ia32_pternlogq128_mask:
14386 case X86::BI__builtin_ia32_pternlogq256_mask:
14387 return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
14388
14389 case X86::BI__builtin_ia32_pternlogd512_maskz:
14390 case X86::BI__builtin_ia32_pternlogq512_maskz:
14391 case X86::BI__builtin_ia32_pternlogd128_maskz:
14392 case X86::BI__builtin_ia32_pternlogd256_maskz:
14393 case X86::BI__builtin_ia32_pternlogq128_maskz:
14394 case X86::BI__builtin_ia32_pternlogq256_maskz:
14395 return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
14396
14397 case X86::BI__builtin_ia32_vpshldd128:
14398 case X86::BI__builtin_ia32_vpshldd256:
14399 case X86::BI__builtin_ia32_vpshldd512:
14400 case X86::BI__builtin_ia32_vpshldq128:
14401 case X86::BI__builtin_ia32_vpshldq256:
14402 case X86::BI__builtin_ia32_vpshldq512:
14403 case X86::BI__builtin_ia32_vpshldw128:
14404 case X86::BI__builtin_ia32_vpshldw256:
14405 case X86::BI__builtin_ia32_vpshldw512:
14406 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
14407
14408 case X86::BI__builtin_ia32_vpshrdd128:
14409 case X86::BI__builtin_ia32_vpshrdd256:
14410 case X86::BI__builtin_ia32_vpshrdd512:
14411 case X86::BI__builtin_ia32_vpshrdq128:
14412 case X86::BI__builtin_ia32_vpshrdq256:
14413 case X86::BI__builtin_ia32_vpshrdq512:
14414 case X86::BI__builtin_ia32_vpshrdw128:
14415 case X86::BI__builtin_ia32_vpshrdw256:
14416 case X86::BI__builtin_ia32_vpshrdw512:
14417 // Ops 0 and 1 are swapped.
14418 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
14419
14420 case X86::BI__builtin_ia32_vpshldvd128:
14421 case X86::BI__builtin_ia32_vpshldvd256:
14422 case X86::BI__builtin_ia32_vpshldvd512:
14423 case X86::BI__builtin_ia32_vpshldvq128:
14424 case X86::BI__builtin_ia32_vpshldvq256:
14425 case X86::BI__builtin_ia32_vpshldvq512:
14426 case X86::BI__builtin_ia32_vpshldvw128:
14427 case X86::BI__builtin_ia32_vpshldvw256:
14428 case X86::BI__builtin_ia32_vpshldvw512:
14429 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
14430
14431 case X86::BI__builtin_ia32_vpshrdvd128:
14432 case X86::BI__builtin_ia32_vpshrdvd256:
14433 case X86::BI__builtin_ia32_vpshrdvd512:
14434 case X86::BI__builtin_ia32_vpshrdvq128:
14435 case X86::BI__builtin_ia32_vpshrdvq256:
14436 case X86::BI__builtin_ia32_vpshrdvq512:
14437 case X86::BI__builtin_ia32_vpshrdvw128:
14438 case X86::BI__builtin_ia32_vpshrdvw256:
14439 case X86::BI__builtin_ia32_vpshrdvw512:
14440 // Ops 0 and 1 are swapped.
14441 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
14442
14443 // Reductions
14444 case X86::BI__builtin_ia32_reduce_fadd_pd512:
14445 case X86::BI__builtin_ia32_reduce_fadd_ps512:
14446 case X86::BI__builtin_ia32_reduce_fadd_ph512:
14447 case X86::BI__builtin_ia32_reduce_fadd_ph256:
14448 case X86::BI__builtin_ia32_reduce_fadd_ph128: {
14449 Function *F =
14450 CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
14451 Builder.getFastMathFlags().setAllowReassoc();
14452 return Builder.CreateCall(F, {Ops[0], Ops[1]});
14453 }
14454 case X86::BI__builtin_ia32_reduce_fmul_pd512:
14455 case X86::BI__builtin_ia32_reduce_fmul_ps512:
14456 case X86::BI__builtin_ia32_reduce_fmul_ph512:
14457 case X86::BI__builtin_ia32_reduce_fmul_ph256:
14458 case X86::BI__builtin_ia32_reduce_fmul_ph128: {
14459 Function *F =
14460 CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
14461 Builder.getFastMathFlags().setAllowReassoc();
14462 return Builder.CreateCall(F, {Ops[0], Ops[1]});
14463 }
14464 case X86::BI__builtin_ia32_reduce_fmax_pd512:
14465 case X86::BI__builtin_ia32_reduce_fmax_ps512:
14466 case X86::BI__builtin_ia32_reduce_fmax_ph512:
14467 case X86::BI__builtin_ia32_reduce_fmax_ph256:
14468 case X86::BI__builtin_ia32_reduce_fmax_ph128: {
14469 Function *F =
14470 CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
14471 Builder.getFastMathFlags().setNoNaNs();
14472 return Builder.CreateCall(F, {Ops[0]});
14473 }
14474 case X86::BI__builtin_ia32_reduce_fmin_pd512:
14475 case X86::BI__builtin_ia32_reduce_fmin_ps512:
14476 case X86::BI__builtin_ia32_reduce_fmin_ph512:
14477 case X86::BI__builtin_ia32_reduce_fmin_ph256:
14478 case X86::BI__builtin_ia32_reduce_fmin_ph128: {
14479 Function *F =
14480 CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
14481 Builder.getFastMathFlags().setNoNaNs();
14482 return Builder.CreateCall(F, {Ops[0]});
14483 }
14484
14485 // 3DNow!
14486 case X86::BI__builtin_ia32_pswapdsf:
14487 case X86::BI__builtin_ia32_pswapdsi: {
14488 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
14489 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
14490 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
14491 return Builder.CreateCall(F, Ops, "pswapd");
14492 }
14493 case X86::BI__builtin_ia32_rdrand16_step:
14494 case X86::BI__builtin_ia32_rdrand32_step:
14495 case X86::BI__builtin_ia32_rdrand64_step:
14496 case X86::BI__builtin_ia32_rdseed16_step:
14497 case X86::BI__builtin_ia32_rdseed32_step:
14498 case X86::BI__builtin_ia32_rdseed64_step: {
14499 Intrinsic::ID ID;
14500 switch (BuiltinID) {
14501 default: llvm_unreachable("Unsupported intrinsic!");
14502 case X86::BI__builtin_ia32_rdrand16_step:
14503 ID = Intrinsic::x86_rdrand_16;
14504 break;
14505 case X86::BI__builtin_ia32_rdrand32_step:
14506 ID = Intrinsic::x86_rdrand_32;
14507 break;
14508 case X86::BI__builtin_ia32_rdrand64_step:
14509 ID = Intrinsic::x86_rdrand_64;
14510 break;
14511 case X86::BI__builtin_ia32_rdseed16_step:
14512 ID = Intrinsic::x86_rdseed_16;
14513 break;
14514 case X86::BI__builtin_ia32_rdseed32_step:
14515 ID = Intrinsic::x86_rdseed_32;
14516 break;
14517 case X86::BI__builtin_ia32_rdseed64_step:
14518 ID = Intrinsic::x86_rdseed_64;
14519 break;
14520 }
14521
14522 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
14523 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
14524 Ops[0]);
14525 return Builder.CreateExtractValue(Call, 1);
14526 }
14527 case X86::BI__builtin_ia32_addcarryx_u32:
14528 case X86::BI__builtin_ia32_addcarryx_u64:
14529 case X86::BI__builtin_ia32_subborrow_u32:
14530 case X86::BI__builtin_ia32_subborrow_u64: {
14531 Intrinsic::ID IID;
14532 switch (BuiltinID) {
14533 default: llvm_unreachable("Unsupported intrinsic!");
14534 case X86::BI__builtin_ia32_addcarryx_u32:
14535 IID = Intrinsic::x86_addcarry_32;
14536 break;
14537 case X86::BI__builtin_ia32_addcarryx_u64:
14538 IID = Intrinsic::x86_addcarry_64;
14539 break;
14540 case X86::BI__builtin_ia32_subborrow_u32:
14541 IID = Intrinsic::x86_subborrow_32;
14542 break;
14543 case X86::BI__builtin_ia32_subborrow_u64:
14544 IID = Intrinsic::x86_subborrow_64;
14545 break;
14546 }
14547
14548 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
14549 { Ops[0], Ops[1], Ops[2] });
14550 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
14551 Ops[3]);
14552 return Builder.CreateExtractValue(Call, 0);
14553 }
14554
14555 case X86::BI__builtin_ia32_fpclassps128_mask:
14556 case X86::BI__builtin_ia32_fpclassps256_mask:
14557 case X86::BI__builtin_ia32_fpclassps512_mask:
14558 case X86::BI__builtin_ia32_fpclassph128_mask:
14559 case X86::BI__builtin_ia32_fpclassph256_mask:
14560 case X86::BI__builtin_ia32_fpclassph512_mask:
14561 case X86::BI__builtin_ia32_fpclasspd128_mask:
14562 case X86::BI__builtin_ia32_fpclasspd256_mask:
14563 case X86::BI__builtin_ia32_fpclasspd512_mask: {
14564 unsigned NumElts =
14565 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14566 Value *MaskIn = Ops[2];
14567 Ops.erase(&Ops[2]);
14568
14569 Intrinsic::ID ID;
14570 switch (BuiltinID) {
14571 default: llvm_unreachable("Unsupported intrinsic!");
14572 case X86::BI__builtin_ia32_fpclassph128_mask:
14573 ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
14574 break;
14575 case X86::BI__builtin_ia32_fpclassph256_mask:
14576 ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
14577 break;
14578 case X86::BI__builtin_ia32_fpclassph512_mask:
14579 ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
14580 break;
14581 case X86::BI__builtin_ia32_fpclassps128_mask:
14582 ID = Intrinsic::x86_avx512_fpclass_ps_128;
14583 break;
14584 case X86::BI__builtin_ia32_fpclassps256_mask:
14585 ID = Intrinsic::x86_avx512_fpclass_ps_256;
14586 break;
14587 case X86::BI__builtin_ia32_fpclassps512_mask:
14588 ID = Intrinsic::x86_avx512_fpclass_ps_512;
14589 break;
14590 case X86::BI__builtin_ia32_fpclasspd128_mask:
14591 ID = Intrinsic::x86_avx512_fpclass_pd_128;
14592 break;
14593 case X86::BI__builtin_ia32_fpclasspd256_mask:
14594 ID = Intrinsic::x86_avx512_fpclass_pd_256;
14595 break;
14596 case X86::BI__builtin_ia32_fpclasspd512_mask:
14597 ID = Intrinsic::x86_avx512_fpclass_pd_512;
14598 break;
14599 }
14600
14601 Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
14602 return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
14603 }
14604
14605 case X86::BI__builtin_ia32_vp2intersect_q_512:
14606 case X86::BI__builtin_ia32_vp2intersect_q_256:
14607 case X86::BI__builtin_ia32_vp2intersect_q_128:
14608 case X86::BI__builtin_ia32_vp2intersect_d_512:
14609 case X86::BI__builtin_ia32_vp2intersect_d_256:
14610 case X86::BI__builtin_ia32_vp2intersect_d_128: {
14611 unsigned NumElts =
14612 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14613 Intrinsic::ID ID;
14614
14615 switch (BuiltinID) {
14616 default: llvm_unreachable("Unsupported intrinsic!");
14617 case X86::BI__builtin_ia32_vp2intersect_q_512:
14618 ID = Intrinsic::x86_avx512_vp2intersect_q_512;
14619 break;
14620 case X86::BI__builtin_ia32_vp2intersect_q_256:
14621 ID = Intrinsic::x86_avx512_vp2intersect_q_256;
14622 break;
14623 case X86::BI__builtin_ia32_vp2intersect_q_128:
14624 ID = Intrinsic::x86_avx512_vp2intersect_q_128;
14625 break;
14626 case X86::BI__builtin_ia32_vp2intersect_d_512:
14627 ID = Intrinsic::x86_avx512_vp2intersect_d_512;
14628 break;
14629 case X86::BI__builtin_ia32_vp2intersect_d_256:
14630 ID = Intrinsic::x86_avx512_vp2intersect_d_256;
14631 break;
14632 case X86::BI__builtin_ia32_vp2intersect_d_128:
14633 ID = Intrinsic::x86_avx512_vp2intersect_d_128;
14634 break;
14635 }
14636
14637 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
14638 Value *Result = Builder.CreateExtractValue(Call, 0);
14639 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
14640 Builder.CreateDefaultAlignedStore(Result, Ops[2]);
14641
14642 Result = Builder.CreateExtractValue(Call, 1);
14643 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
14644 return Builder.CreateDefaultAlignedStore(Result, Ops[3]);
14645 }
14646
14647 case X86::BI__builtin_ia32_vpmultishiftqb128:
14648 case X86::BI__builtin_ia32_vpmultishiftqb256:
14649 case X86::BI__builtin_ia32_vpmultishiftqb512: {
14650 Intrinsic::ID ID;
14651 switch (BuiltinID) {
14652 default: llvm_unreachable("Unsupported intrinsic!");
14653 case X86::BI__builtin_ia32_vpmultishiftqb128:
14654 ID = Intrinsic::x86_avx512_pmultishift_qb_128;
14655 break;
14656 case X86::BI__builtin_ia32_vpmultishiftqb256:
14657 ID = Intrinsic::x86_avx512_pmultishift_qb_256;
14658 break;
14659 case X86::BI__builtin_ia32_vpmultishiftqb512:
14660 ID = Intrinsic::x86_avx512_pmultishift_qb_512;
14661 break;
14662 }
14663
14664 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
14665 }
14666
14667 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
14668 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
14669 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
14670 unsigned NumElts =
14671 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14672 Value *MaskIn = Ops[2];
14673 Ops.erase(&Ops[2]);
14674
14675 Intrinsic::ID ID;
14676 switch (BuiltinID) {
14677 default: llvm_unreachable("Unsupported intrinsic!");
14678 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
14679 ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
14680 break;
14681 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
14682 ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
14683 break;
14684 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
14685 ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
14686 break;
14687 }
14688
14689 Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
14690 return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
14691 }
14692
14693 // packed comparison intrinsics
14694 case X86::BI__builtin_ia32_cmpeqps:
14695 case X86::BI__builtin_ia32_cmpeqpd:
14696 return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
14697 case X86::BI__builtin_ia32_cmpltps:
14698 case X86::BI__builtin_ia32_cmpltpd:
14699 return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
14700 case X86::BI__builtin_ia32_cmpleps:
14701 case X86::BI__builtin_ia32_cmplepd:
14702 return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
14703 case X86::BI__builtin_ia32_cmpunordps:
14704 case X86::BI__builtin_ia32_cmpunordpd:
14705 return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
14706 case X86::BI__builtin_ia32_cmpneqps:
14707 case X86::BI__builtin_ia32_cmpneqpd:
14708 return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
14709 case X86::BI__builtin_ia32_cmpnltps:
14710 case X86::BI__builtin_ia32_cmpnltpd:
14711 return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
14712 case X86::BI__builtin_ia32_cmpnleps:
14713 case X86::BI__builtin_ia32_cmpnlepd:
14714 return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
14715 case X86::BI__builtin_ia32_cmpordps:
14716 case X86::BI__builtin_ia32_cmpordpd:
14717 return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
14718 case X86::BI__builtin_ia32_cmpph128_mask:
14719 case X86::BI__builtin_ia32_cmpph256_mask:
14720 case X86::BI__builtin_ia32_cmpph512_mask:
14721 case X86::BI__builtin_ia32_cmpps128_mask:
14722 case X86::BI__builtin_ia32_cmpps256_mask:
14723 case X86::BI__builtin_ia32_cmpps512_mask:
14724 case X86::BI__builtin_ia32_cmppd128_mask:
14725 case X86::BI__builtin_ia32_cmppd256_mask:
14726 case X86::BI__builtin_ia32_cmppd512_mask:
14727 IsMaskFCmp = true;
14728 LLVM_FALLTHROUGH;
14729 case X86::BI__builtin_ia32_cmpps:
14730 case X86::BI__builtin_ia32_cmpps256:
14731 case X86::BI__builtin_ia32_cmppd:
14732 case X86::BI__builtin_ia32_cmppd256: {
14733 // Lowering vector comparisons to fcmp instructions, while
14734 // ignoring signalling behaviour requested
14735 // ignoring rounding mode requested
14736 // This is only possible if fp-model is not strict and FENV_ACCESS is off.
14737
14738 // The third argument is the comparison condition, and integer in the
14739 // range [0, 31]
14740 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
14741
14742 // Lowering to IR fcmp instruction.
14743 // Ignoring requested signaling behaviour,
14744 // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
14745 FCmpInst::Predicate Pred;
14746 bool IsSignaling;
14747 // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
14748 // behavior is inverted. We'll handle that after the switch.
14749 switch (CC & 0xf) {
14750 case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;
14751 case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;
14752 case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;
14753 case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;
14754 case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;
14755 case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;
14756 case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;
14757 case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;
14758 case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;
14759 case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;
14760 case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;
14761 case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
14762 case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;
14763 case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;
14764 case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;
14765 case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;
14766 default: llvm_unreachable("Unhandled CC");
14767 }
14768
14769 // Invert the signalling behavior for 16-31.
14770 if (CC & 0x10)
14771 IsSignaling = !IsSignaling;
14772
14773 // If the predicate is true or false and we're using constrained intrinsics,
14774 // we don't have a compare intrinsic we can use. Just use the legacy X86
14775 // specific intrinsic.
14776 // If the intrinsic is mask enabled and we're using constrained intrinsics,
14777 // use the legacy X86 specific intrinsic.
14778 if (Builder.getIsFPConstrained() &&
14779 (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
14780 IsMaskFCmp)) {
14781
14782 Intrinsic::ID IID;
14783 switch (BuiltinID) {
14784 default: llvm_unreachable("Unexpected builtin");
14785 case X86::BI__builtin_ia32_cmpps:
14786 IID = Intrinsic::x86_sse_cmp_ps;
14787 break;
14788 case X86::BI__builtin_ia32_cmpps256:
14789 IID = Intrinsic::x86_avx_cmp_ps_256;
14790 break;
14791 case X86::BI__builtin_ia32_cmppd:
14792 IID = Intrinsic::x86_sse2_cmp_pd;
14793 break;
14794 case X86::BI__builtin_ia32_cmppd256:
14795 IID = Intrinsic::x86_avx_cmp_pd_256;
14796 break;
14797 case X86::BI__builtin_ia32_cmpps512_mask:
14798 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
14799 break;
14800 case X86::BI__builtin_ia32_cmppd512_mask:
14801 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
14802 break;
14803 case X86::BI__builtin_ia32_cmpps128_mask:
14804 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
14805 break;
14806 case X86::BI__builtin_ia32_cmpps256_mask:
14807 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
14808 break;
14809 case X86::BI__builtin_ia32_cmppd128_mask:
14810 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
14811 break;
14812 case X86::BI__builtin_ia32_cmppd256_mask:
14813 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
14814 break;
14815 }
14816
14817 Function *Intr = CGM.getIntrinsic(IID);
14818 if (IsMaskFCmp) {
14819 unsigned NumElts =
14820 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14821 Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
14822 Value *Cmp = Builder.CreateCall(Intr, Ops);
14823 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
14824 }
14825
14826 return Builder.CreateCall(Intr, Ops);
14827 }
14828
14829 // Builtins without the _mask suffix return a vector of integers
14830 // of the same width as the input vectors
14831 if (IsMaskFCmp) {
14832 // We ignore SAE if strict FP is disabled. We only keep precise
14833 // exception behavior under strict FP.
14834 // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
14835 // object will be required.
14836 unsigned NumElts =
14837 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14838 Value *Cmp;
14839 if (IsSignaling)
14840 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
14841 else
14842 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
14843 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
14844 }
14845
14846 return getVectorFCmpIR(Pred, IsSignaling);
14847 }
14848
14849 // SSE scalar comparison intrinsics
14850 case X86::BI__builtin_ia32_cmpeqss:
14851 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
14852 case X86::BI__builtin_ia32_cmpltss:
14853 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
14854 case X86::BI__builtin_ia32_cmpless:
14855 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
14856 case X86::BI__builtin_ia32_cmpunordss:
14857 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
14858 case X86::BI__builtin_ia32_cmpneqss:
14859 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
14860 case X86::BI__builtin_ia32_cmpnltss:
14861 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
14862 case X86::BI__builtin_ia32_cmpnless:
14863 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
14864 case X86::BI__builtin_ia32_cmpordss:
14865 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
14866 case X86::BI__builtin_ia32_cmpeqsd:
14867 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
14868 case X86::BI__builtin_ia32_cmpltsd:
14869 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
14870 case X86::BI__builtin_ia32_cmplesd:
14871 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
14872 case X86::BI__builtin_ia32_cmpunordsd:
14873 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
14874 case X86::BI__builtin_ia32_cmpneqsd:
14875 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
14876 case X86::BI__builtin_ia32_cmpnltsd:
14877 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
14878 case X86::BI__builtin_ia32_cmpnlesd:
14879 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
14880 case X86::BI__builtin_ia32_cmpordsd:
14881 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
14882
14883 // f16c half2float intrinsics
14884 case X86::BI__builtin_ia32_vcvtph2ps:
14885 case X86::BI__builtin_ia32_vcvtph2ps256:
14886 case X86::BI__builtin_ia32_vcvtph2ps_mask:
14887 case X86::BI__builtin_ia32_vcvtph2ps256_mask:
14888 case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
14889 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
14890 return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
14891 }
14892
14893// AVX512 bf16 intrinsics
14894 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
14895 Ops[2] = getMaskVecValue(
14896 *this, Ops[2],
14897 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
14898 Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
14899 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
14900 }
14901 case X86::BI__builtin_ia32_cvtsbf162ss_32:
14902 return EmitX86CvtBF16ToFloatExpr(*this, E, Ops);
14903
14904 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
14905 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
14906 Intrinsic::ID IID;
14907 switch (BuiltinID) {
14908 default: llvm_unreachable("Unsupported intrinsic!");
14909 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
14910 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
14911 break;
14912 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
14913 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
14914 break;
14915 }
14916 Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
14917 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
14918 }
14919
14920 case X86::BI__cpuid:
14921 case X86::BI__cpuidex: {
14922 Value *FuncId = EmitScalarExpr(E->getArg(1));
14923 Value *SubFuncId = BuiltinID == X86::BI__cpuidex
14924 ? EmitScalarExpr(E->getArg(2))
14925 : llvm::ConstantInt::get(Int32Ty, 0);
14926
14927 llvm::StructType *CpuidRetTy =
14928 llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);
14929 llvm::FunctionType *FTy =
14930 llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);
14931
14932 StringRef Asm, Constraints;
14933 if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
14934 Asm = "cpuid";
14935 Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
14936 } else {
14937 // x86-64 uses %rbx as the base register, so preserve it.
14938 Asm = "xchgq %rbx, ${1:q}\n"
14939 "cpuid\n"
14940 "xchgq %rbx, ${1:q}";
14941 Constraints = "={ax},=r,={cx},={dx},0,2";
14942 }
14943
14944 llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
14945 /*hasSideEffects=*/false);
14946 Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
14947 Value *BasePtr = EmitScalarExpr(E->getArg(0));
14948 Value *Store = nullptr;
14949 for (unsigned i = 0; i < 4; i++) {
14950 Value *Extracted = Builder.CreateExtractValue(IACall, i);
14951 Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
14952 Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
14953 }
14954
14955 // Return the last store instruction to signal that we have emitted the
14956 // the intrinsic.
14957 return Store;
14958 }
14959
14960 case X86::BI__emul:
14961 case X86::BI__emulu: {
14962 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
14963 bool isSigned = (BuiltinID == X86::BI__emul);
14964 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
14965 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
14966 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
14967 }
14968 case X86::BI__mulh:
14969 case X86::BI__umulh:
14970 case X86::BI_mul128:
14971 case X86::BI_umul128: {
14972 llvm::Type *ResType = ConvertType(E->getType());
14973 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
14974
14975 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
14976 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
14977 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
14978
14979 Value *MulResult, *HigherBits;
14980 if (IsSigned) {
14981 MulResult = Builder.CreateNSWMul(LHS, RHS);
14982 HigherBits = Builder.CreateAShr(MulResult, 64);
14983 } else {
14984 MulResult = Builder.CreateNUWMul(LHS, RHS);
14985 HigherBits = Builder.CreateLShr(MulResult, 64);
14986 }
14987 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
14988
14989 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
14990 return HigherBits;
14991
14992 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
14993 Builder.CreateStore(HigherBits, HighBitsAddress);
14994 return Builder.CreateIntCast(MulResult, ResType, IsSigned);
14995 }
14996
14997 case X86::BI__faststorefence: {
14998 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
14999 llvm::SyncScope::System);
15000 }
15001 case X86::BI__shiftleft128:
15002 case X86::BI__shiftright128: {
15003 llvm::Function *F = CGM.getIntrinsic(
15004 BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
15005 Int64Ty);
15006 // Flip low/high ops and zero-extend amount to matching type.
15007 // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
15008 // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
15009 std::swap(Ops[0], Ops[1]);
15010 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
15011 return Builder.CreateCall(F, Ops);
15012 }
15013 case X86::BI_ReadWriteBarrier:
15014 case X86::BI_ReadBarrier:
15015 case X86::BI_WriteBarrier: {
15016 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
15017 llvm::SyncScope::SingleThread);
15018 }
15019
15020 case X86::BI_AddressOfReturnAddress: {
15021 Function *F =
15022 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
15023 return Builder.CreateCall(F);
15024 }
15025 case X86::BI__stosb: {
15026 // We treat __stosb as a volatile memset - it may not generate "rep stosb"
15027 // instruction, but it will create a memset that won't be optimized away.
15028 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
15029 }
15030 case X86::BI__ud2:
15031 // llvm.trap makes a ud2a instruction on x86.
15032 return EmitTrapCall(Intrinsic::trap);
15033 case X86::BI__int2c: {
15034 // This syscall signals a driver assertion failure in x86 NT kernels.
15035 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
15036 llvm::InlineAsm *IA =
15037 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
15038 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
15039 getLLVMContext(), llvm::AttributeList::FunctionIndex,
15040 llvm::Attribute::NoReturn);
15041 llvm::CallInst *CI = Builder.CreateCall(IA);
15042 CI->setAttributes(NoReturnAttr);
15043 return CI;
15044 }
15045 case X86::BI__readfsbyte:
15046 case X86::BI__readfsword:
15047 case X86::BI__readfsdword:
15048 case X86::BI__readfsqword: {
15049 llvm::Type *IntTy = ConvertType(E->getType());
15050 Value *Ptr =
15051 Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 257));
15052 LoadInst *Load = Builder.CreateAlignedLoad(
15053 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
15054 Load->setVolatile(true);
15055 return Load;
15056 }
15057 case X86::BI__readgsbyte:
15058 case X86::BI__readgsword:
15059 case X86::BI__readgsdword:
15060 case X86::BI__readgsqword: {
15061 llvm::Type *IntTy = ConvertType(E->getType());
15062 Value *Ptr =
15063 Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 256));
15064 LoadInst *Load = Builder.CreateAlignedLoad(
15065 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
15066 Load->setVolatile(true);
15067 return Load;
15068 }
15069 case X86::BI__builtin_ia32_encodekey128_u32: {
15070 Intrinsic::ID IID = Intrinsic::x86_encodekey128;
15071
15072 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
15073
15074 for (int i = 0; i < 3; ++i) {
15075 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
15076 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
15077 Ptr = Builder.CreateBitCast(
15078 Ptr, llvm::PointerType::getUnqual(Extract->getType()));
15079 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
15080 }
15081
15082 return Builder.CreateExtractValue(Call, 0);
15083 }
15084 case X86::BI__builtin_ia32_encodekey256_u32: {
15085 Intrinsic::ID IID = Intrinsic::x86_encodekey256;
15086
15087 Value *Call =
15088 Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
15089
15090 for (int i = 0; i < 4; ++i) {
15091 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
15092 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
15093 Ptr = Builder.CreateBitCast(
15094 Ptr, llvm::PointerType::getUnqual(Extract->getType()));
15095 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
15096 }
15097
15098 return Builder.CreateExtractValue(Call, 0);
15099 }
15100 case X86::BI__builtin_ia32_aesenc128kl_u8:
15101 case X86::BI__builtin_ia32_aesdec128kl_u8:
15102 case X86::BI__builtin_ia32_aesenc256kl_u8:
15103 case X86::BI__builtin_ia32_aesdec256kl_u8: {
15104 Intrinsic::ID IID;
15105 StringRef BlockName;
15106 switch (BuiltinID) {
15107 default:
15108 llvm_unreachable("Unexpected builtin");
15109 case X86::BI__builtin_ia32_aesenc128kl_u8:
15110 IID = Intrinsic::x86_aesenc128kl;
15111 BlockName = "aesenc128kl";
15112 break;
15113 case X86::BI__builtin_ia32_aesdec128kl_u8:
15114 IID = Intrinsic::x86_aesdec128kl;
15115 BlockName = "aesdec128kl";
15116 break;
15117 case X86::BI__builtin_ia32_aesenc256kl_u8:
15118 IID = Intrinsic::x86_aesenc256kl;
15119 BlockName = "aesenc256kl";
15120 break;
15121 case X86::BI__builtin_ia32_aesdec256kl_u8:
15122 IID = Intrinsic::x86_aesdec256kl;
15123 BlockName = "aesdec256kl";
15124 break;
15125 }
15126
15127 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
15128
15129 BasicBlock *NoError =
15130 createBasicBlock(BlockName + "_no_error", this->CurFn);
15131 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
15132 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
15133
15134 Value *Ret = Builder.CreateExtractValue(Call, 0);
15135 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
15136 Value *Out = Builder.CreateExtractValue(Call, 1);
15137 Builder.CreateCondBr(Succ, NoError, Error);
15138
15139 Builder.SetInsertPoint(NoError);
15140 Builder.CreateDefaultAlignedStore(Out, Ops[0]);
15141 Builder.CreateBr(End);
15142
15143 Builder.SetInsertPoint(Error);
15144 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
15145 Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
15146 Builder.CreateBr(End);
15147
15148 Builder.SetInsertPoint(End);
15149 return Builder.CreateExtractValue(Call, 0);
15150 }
15151 case X86::BI__builtin_ia32_aesencwide128kl_u8:
15152 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
15153 case X86::BI__builtin_ia32_aesencwide256kl_u8:
15154 case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
15155 Intrinsic::ID IID;
15156 StringRef BlockName;
15157 switch (BuiltinID) {
15158 case X86::BI__builtin_ia32_aesencwide128kl_u8:
15159 IID = Intrinsic::x86_aesencwide128kl;
15160 BlockName = "aesencwide128kl";
15161 break;
15162 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
15163 IID = Intrinsic::x86_aesdecwide128kl;
15164 BlockName = "aesdecwide128kl";
15165 break;
15166 case X86::BI__builtin_ia32_aesencwide256kl_u8:
15167 IID = Intrinsic::x86_aesencwide256kl;
15168 BlockName = "aesencwide256kl";
15169 break;
15170 case X86::BI__builtin_ia32_aesdecwide256kl_u8:
15171 IID = Intrinsic::x86_aesdecwide256kl;
15172 BlockName = "aesdecwide256kl";
15173 break;
15174 }
15175
15176 llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
15177 Value *InOps[9];
15178 InOps[0] = Ops[2];
15179 for (int i = 0; i != 8; ++i) {
15180 Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
15181 InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
15182 }
15183
15184 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
15185
15186 BasicBlock *NoError =
15187 createBasicBlock(BlockName + "_no_error", this->CurFn);
15188 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
15189 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
15190
15191 Value *Ret = Builder.CreateExtractValue(Call, 0);
15192 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
15193 Builder.CreateCondBr(Succ, NoError, Error);
15194
15195 Builder.SetInsertPoint(NoError);
15196 for (int i = 0; i != 8; ++i) {
15197 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
15198 Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i);
15199 Builder.CreateAlignedStore(Extract, Ptr, Align(16));
15200 }
15201 Builder.CreateBr(End);
15202
15203 Builder.SetInsertPoint(Error);
15204 for (int i = 0; i != 8; ++i) {
15205 Value *Out = Builder.CreateExtractValue(Call, i + 1);
15206 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
15207 Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
15208 Builder.CreateAlignedStore(Zero, Ptr, Align(16));
15209 }
15210 Builder.CreateBr(End);
15211
15212 Builder.SetInsertPoint(End);
15213 return Builder.CreateExtractValue(Call, 0);
15214 }
15215 case X86::BI__builtin_ia32_vfcmaddcph512_mask:
15216 IsConjFMA = true;
15217 LLVM_FALLTHROUGH;
15218 case X86::BI__builtin_ia32_vfmaddcph512_mask: {
15219 Intrinsic::ID IID = IsConjFMA
15220 ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
15221 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
15222 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
15223 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
15224 }
15225 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
15226 IsConjFMA = true;
15227 LLVM_FALLTHROUGH;
15228 case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
15229 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
15230 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
15231 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
15232 Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
15233 return EmitX86Select(*this, And, Call, Ops[0]);
15234 }
15235 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
15236 IsConjFMA = true;
15237 LLVM_FALLTHROUGH;
15238 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
15239 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
15240 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
15241 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
15242 static constexpr int Mask[] = {0, 5, 6, 7};
15243 return Builder.CreateShuffleVector(Call, Ops[2], Mask);
15244 }
15245 }
15246}
15247
15248Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
15249 const CallExpr *E) {
15250 // Do not emit the builtin arguments in the arguments of a function call,
15251 // because the evaluation order of function arguments is not specified in C++.
15252 // This is important when testing to ensure the arguments are emitted in the
15253 // same order every time. Eg:
15254 // Instead of:
15255 // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
15256 // EmitScalarExpr(E->getArg(1)), "swdiv");
15257 // Use:
15258 // Value *Op0 = EmitScalarExpr(E->getArg(0));
15259 // Value *Op1 = EmitScalarExpr(E->getArg(1));
15260 // return Builder.CreateFDiv(Op0, Op1, "swdiv")
15261
15262 Intrinsic::ID ID = Intrinsic::not_intrinsic;
15263
15264 switch (BuiltinID) {
15265 default: return nullptr;
15266
15267 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
15268 // call __builtin_readcyclecounter.
15269 case PPC::BI__builtin_ppc_get_timebase:
15270 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
15271
15272 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
15273 case PPC::BI__builtin_altivec_lvx:
15274 case PPC::BI__builtin_altivec_lvxl:
15275 case PPC::BI__builtin_altivec_lvebx:
15276 case PPC::BI__builtin_altivec_lvehx:
15277 case PPC::BI__builtin_altivec_lvewx:
15278 case PPC::BI__builtin_altivec_lvsl:
15279 case PPC::BI__builtin_altivec_lvsr:
15280 case PPC::BI__builtin_vsx_lxvd2x:
15281 case PPC::BI__builtin_vsx_lxvw4x:
15282 case PPC::BI__builtin_vsx_lxvd2x_be:
15283 case PPC::BI__builtin_vsx_lxvw4x_be:
15284 case PPC::BI__builtin_vsx_lxvl:
15285 case PPC::BI__builtin_vsx_lxvll:
15286 {
15288 Ops.push_back(EmitScalarExpr(E->getArg(0)));
15289 Ops.push_back(EmitScalarExpr(E->getArg(1)));
15290 if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
15291 BuiltinID == PPC::BI__builtin_vsx_lxvll){
15292 Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
15293 }else {
15294 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
15295 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
15296 Ops.pop_back();
15297 }
15298
15299 switch (BuiltinID) {
15300 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
15301 case PPC::BI__builtin_altivec_lvx:
15302 ID = Intrinsic::ppc_altivec_lvx;
15303 break;
15304 case PPC::BI__builtin_altivec_lvxl:
15305 ID = Intrinsic::ppc_altivec_lvxl;
15306 break;
15307 case PPC::BI__builtin_altivec_lvebx:
15308 ID = Intrinsic::ppc_altivec_lvebx;
15309 break;
15310 case PPC::BI__builtin_altivec_lvehx:
15311 ID = Intrinsic::ppc_altivec_lvehx;
15312 break;
15313 case PPC::BI__builtin_altivec_lvewx:
15314 ID = Intrinsic::ppc_altivec_lvewx;
15315 break;
15316 case PPC::BI__builtin_altivec_lvsl:
15317 ID = Intrinsic::ppc_altivec_lvsl;
15318 break;
15319 case PPC::BI__builtin_altivec_lvsr:
15320 ID = Intrinsic::ppc_altivec_lvsr;
15321 break;
15322 case PPC::BI__builtin_vsx_lxvd2x:
15323 ID = Intrinsic::ppc_vsx_lxvd2x;
15324 break;
15325 case PPC::BI__builtin_vsx_lxvw4x:
15326 ID = Intrinsic::ppc_vsx_lxvw4x;
15327 break;
15328 case PPC::BI__builtin_vsx_lxvd2x_be:
15329 ID = Intrinsic::ppc_vsx_lxvd2x_be;
15330 break;
15331 case PPC::BI__builtin_vsx_lxvw4x_be:
15332 ID = Intrinsic::ppc_vsx_lxvw4x_be;
15333 break;
15334 case PPC::BI__builtin_vsx_lxvl:
15335 ID = Intrinsic::ppc_vsx_lxvl;
15336 break;
15337 case PPC::BI__builtin_vsx_lxvll:
15338 ID = Intrinsic::ppc_vsx_lxvll;
15339 break;
15340 }
15341 llvm::Function *F = CGM.getIntrinsic(ID);
15342 return Builder.CreateCall(F, Ops, "");
15343 }
15344
15345 // vec_st, vec_xst_be
15346 case PPC::BI__builtin_altivec_stvx:
15347 case PPC::BI__builtin_altivec_stvxl:
15348 case PPC::BI__builtin_altivec_stvebx:
15349 case PPC::BI__builtin_altivec_stvehx:
15350 case PPC::BI__builtin_altivec_stvewx:
15351 case PPC::BI__builtin_vsx_stxvd2x:
15352 case PPC::BI__builtin_vsx_stxvw4x:
15353 case PPC::BI__builtin_vsx_stxvd2x_be:
15354 case PPC::BI__builtin_vsx_stxvw4x_be:
15355 case PPC::BI__builtin_vsx_stxvl:
15356 case PPC::BI__builtin_vsx_stxvll:
15357 {
15359 Ops.push_back(EmitScalarExpr(E->getArg(0)));
15360 Ops.push_back(EmitScalarExpr(E->getArg(1)));
15361 Ops.push_back(EmitScalarExpr(E->getArg(2)));
15362 if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
15363 BuiltinID == PPC::BI__builtin_vsx_stxvll ){
15364 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
15365 }else {
15366 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
15367 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
15368 Ops.pop_back();
15369 }
15370
15371 switch (BuiltinID) {
15372 default: llvm_unreachable("Unsupported st intrinsic!");
15373 case PPC::BI__builtin_altivec_stvx:
15374 ID = Intrinsic::ppc_altivec_stvx;
15375 break;
15376 case PPC::BI__builtin_altivec_stvxl:
15377 ID = Intrinsic::ppc_altivec_stvxl;
15378 break;
15379 case PPC::BI__builtin_altivec_stvebx:
15380 ID = Intrinsic::ppc_altivec_stvebx;
15381 break;
15382 case PPC::BI__builtin_altivec_stvehx:
15383 ID = Intrinsic::ppc_altivec_stvehx;
15384 break;
15385 case PPC::BI__builtin_altivec_stvewx:
15386 ID = Intrinsic::ppc_altivec_stvewx;
15387 break;
15388 case PPC::BI__builtin_vsx_stxvd2x:
15389 ID = Intrinsic::ppc_vsx_stxvd2x;
15390 break;
15391 case PPC::BI__builtin_vsx_stxvw4x:
15392 ID = Intrinsic::ppc_vsx_stxvw4x;
15393 break;
15394 case PPC::BI__builtin_vsx_stxvd2x_be:
15395 ID = Intrinsic::ppc_vsx_stxvd2x_be;
15396 break;
15397 case PPC::BI__builtin_vsx_stxvw4x_be:
15398 ID = Intrinsic::ppc_vsx_stxvw4x_be;
15399 break;
15400 case PPC::BI__builtin_vsx_stxvl:
15401 ID = Intrinsic::ppc_vsx_stxvl;
15402 break;
15403 case PPC::BI__builtin_vsx_stxvll:
15404 ID = Intrinsic::ppc_vsx_stxvll;
15405 break;
15406 }
15407 llvm::Function *F = CGM.getIntrinsic(ID);
15408 return Builder.CreateCall(F, Ops, "");
15409 }
15410 case PPC::BI__builtin_vsx_ldrmb: {
15411 // Essentially boils down to performing an unaligned VMX load sequence so
15412 // as to avoid crossing a page boundary and then shuffling the elements
15413 // into the right side of the vector register.
15414 Value *Op0 = EmitScalarExpr(E->getArg(0));
15415 Value *Op1 = EmitScalarExpr(E->getArg(1));
15416 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
15417 llvm::Type *ResTy = ConvertType(E->getType());
15418 bool IsLE = getTarget().isLittleEndian();
15419
15420 // If the user wants the entire vector, just load the entire vector.
15421 if (NumBytes == 16) {
15422 Value *BC = Builder.CreateBitCast(Op0, ResTy->getPointerTo());
15423 Value *LD =
15424 Builder.CreateLoad(Address(BC, ResTy, CharUnits::fromQuantity(1)));
15425 if (!IsLE)
15426 return LD;
15427
15428 // Reverse the bytes on LE.
15429 SmallVector<int, 16> RevMask;
15430 for (int Idx = 0; Idx < 16; Idx++)
15431 RevMask.push_back(15 - Idx);
15432 return Builder.CreateShuffleVector(LD, LD, RevMask);
15433 }
15434
15435 llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx);
15436 llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr
15437 : Intrinsic::ppc_altivec_lvsl);
15438 llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);
15439 Value *HiMem = Builder.CreateGEP(
15440 Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1));
15441 Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo");
15442 Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi");
15443 Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1");
15444
15445 Op0 = IsLE ? HiLd : LoLd;
15446 Op1 = IsLE ? LoLd : HiLd;
15447 Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1");
15448 Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType());
15449
15450 if (IsLE) {
15451 SmallVector<int, 16> Consts;
15452 for (int Idx = 0; Idx < 16; Idx++) {
15453 int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)
15454 : 16 - (NumBytes - Idx);
15455 Consts.push_back(Val);
15456 }
15457 return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy),
15458 Zero, Consts);
15459 }
15461 for (int Idx = 0; Idx < 16; Idx++)
15462 Consts.push_back(Builder.getInt8(NumBytes + Idx));
15463 Value *Mask2 = ConstantVector::get(Consts);
15464 return Builder.CreateBitCast(
15465 Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy);
15466 }
15467 case PPC::BI__builtin_vsx_strmb: {
15468 Value *Op0 = EmitScalarExpr(E->getArg(0));
15469 Value *Op1 = EmitScalarExpr(E->getArg(1));
15470 Value *Op2 = EmitScalarExpr(E->getArg(2));
15471 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
15472 bool IsLE = getTarget().isLittleEndian();
15473 auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {
15474 // Storing the whole vector, simply store it on BE and reverse bytes and
15475 // store on LE.
15476 if (Width == 16) {
15477 Value *BC = Builder.CreateBitCast(Op0, Op2->getType()->getPointerTo());
15478 Value *StVec = Op2;
15479 if (IsLE) {
15480 SmallVector<int, 16> RevMask;
15481 for (int Idx = 0; Idx < 16; Idx++)
15482 RevMask.push_back(15 - Idx);
15483 StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask);
15484 }
15485 return Builder.CreateStore(
15486 StVec, Address(BC, Op2->getType(), CharUnits::fromQuantity(1)));
15487 }
15488 auto *ConvTy = Int64Ty;
15489 unsigned NumElts = 0;
15490 switch (Width) {
15491 default:
15492 llvm_unreachable("width for stores must be a power of 2");
15493 case 8:
15494 ConvTy = Int64Ty;
15495 NumElts = 2;
15496 break;
15497 case 4:
15498 ConvTy = Int32Ty;
15499 NumElts = 4;
15500 break;
15501 case 2:
15502 ConvTy = Int16Ty;
15503 NumElts = 8;
15504 break;
15505 case 1:
15506 ConvTy = Int8Ty;
15507 NumElts = 16;
15508 break;
15509 }
15510 Value *Vec = Builder.CreateBitCast(
15511 Op2, llvm::FixedVectorType::get(ConvTy, NumElts));
15512 Value *Ptr =
15513 Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset));
15514 Value *PtrBC = Builder.CreateBitCast(Ptr, ConvTy->getPointerTo());
15515 Value *Elt = Builder.CreateExtractElement(Vec, EltNo);
15516 if (IsLE && Width > 1) {
15517 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy);
15518 Elt = Builder.CreateCall(F, Elt);
15519 }
15520 return Builder.CreateStore(
15521 Elt, Address(PtrBC, ConvTy, CharUnits::fromQuantity(1)));
15522 };
15523 unsigned Stored = 0;
15524 unsigned RemainingBytes = NumBytes;
15525 Value *Result;
15526 if (NumBytes == 16)
15527 return StoreSubVec(16, 0, 0);
15528 if (NumBytes >= 8) {
15529 Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);
15530 RemainingBytes -= 8;
15531 Stored += 8;
15532 }
15533 if (RemainingBytes >= 4) {
15534 Result = StoreSubVec(4, NumBytes - Stored - 4,
15535 IsLE ? (Stored >> 2) : 3 - (Stored >> 2));
15536 RemainingBytes -= 4;
15537 Stored += 4;
15538 }
15539 if (RemainingBytes >= 2) {
15540 Result = StoreSubVec(2, NumBytes - Stored - 2,
15541 IsLE ? (Stored >> 1) : 7 - (Stored >> 1));
15542 RemainingBytes -= 2;
15543 Stored += 2;
15544 }
15545 if (RemainingBytes)
15546 Result =
15547 StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);
15548 return Result;
15549 }
15550 // Square root
15551 case PPC::BI__builtin_vsx_xvsqrtsp:
15552 case PPC::BI__builtin_vsx_xvsqrtdp: {
15553 llvm::Type *ResultType = ConvertType(E->getType());
15554 Value *X = EmitScalarExpr(E->getArg(0));
15555 if (Builder.getIsFPConstrained()) {
15556 llvm::Function *F = CGM.getIntrinsic(
15557 Intrinsic::experimental_constrained_sqrt, ResultType);
15558 return Builder.CreateConstrainedFPCall(F, X);
15559 } else {
15560 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
15561 return Builder.CreateCall(F, X);
15562 }
15563 }
15564 // Count leading zeros
15565 case PPC::BI__builtin_altivec_vclzb:
15566 case PPC::BI__builtin_altivec_vclzh:
15567 case PPC::BI__builtin_altivec_vclzw:
15568 case PPC::BI__builtin_altivec_vclzd: {
15569 llvm::Type *ResultType = ConvertType(E->getType());
15570 Value *X = EmitScalarExpr(E->getArg(0));
15571 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
15572 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
15573 return Builder.CreateCall(F, {X, Undef});
15574 }
15575 case PPC::BI__builtin_altivec_vctzb:
15576 case PPC::BI__builtin_altivec_vctzh:
15577 case PPC::BI__builtin_altivec_vctzw:
15578 case PPC::BI__builtin_altivec_vctzd: {
15579 llvm::Type *ResultType = ConvertType(E->getType());
15580 Value *X = EmitScalarExpr(E->getArg(0));
15581 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
15582 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
15583 return Builder.CreateCall(F, {X, Undef});
15584 }
15585 case PPC::BI__builtin_altivec_vinsd:
15586 case PPC::BI__builtin_altivec_vinsw:
15587 case PPC::BI__builtin_altivec_vinsd_elt:
15588 case PPC::BI__builtin_altivec_vinsw_elt: {
15589 llvm::Type *ResultType = ConvertType(E->getType());
15590 Value *Op0 = EmitScalarExpr(E->getArg(0));
15591 Value *Op1 = EmitScalarExpr(E->getArg(1));
15592 Value *Op2 = EmitScalarExpr(E->getArg(2));
15593
15594 bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
15595 BuiltinID == PPC::BI__builtin_altivec_vinsd);
15596
15597 bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
15598 BuiltinID == PPC::BI__builtin_altivec_vinsw_elt);
15599
15600 // The third argument must be a compile time constant.
15601 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
15602 assert(ArgCI &&
15603 "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
15604
15605 // Valid value for the third argument is dependent on the input type and
15606 // builtin called.
15607 int ValidMaxValue = 0;
15608 if (IsUnaligned)
15609 ValidMaxValue = (Is32bit) ? 12 : 8;
15610 else
15611 ValidMaxValue = (Is32bit) ? 3 : 1;
15612
15613 // Get value of third argument.
15614 int64_t ConstArg = ArgCI->getSExtValue();
15615
15616 // Compose range checking error message.
15617 std::string RangeErrMsg = IsUnaligned ? "byte" : "element";
15618 RangeErrMsg += " number " + llvm::to_string(ConstArg);
15619 RangeErrMsg += " is outside of the valid range [0, ";
15620 RangeErrMsg += llvm::to_string(ValidMaxValue) + "]";
15621
15622 // Issue error if third argument is not within the valid range.
15623 if (ConstArg < 0 || ConstArg > ValidMaxValue)
15624 CGM.Error(E->getExprLoc(), RangeErrMsg);
15625
15626 // Input to vec_replace_elt is an element index, convert to byte index.
15627 if (!IsUnaligned) {
15628 ConstArg *= Is32bit ? 4 : 8;
15629 // Fix the constant according to endianess.
15630 if (getTarget().isLittleEndian())
15631 ConstArg = (Is32bit ? 12 : 8) - ConstArg;
15632 }
15633
15634 ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd;
15635 Op2 = ConstantInt::getSigned(Int32Ty, ConstArg);
15636 // Casting input to vector int as per intrinsic definition.
15637 Op0 =
15638 Is32bit
15639 ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4))
15640 : Builder.CreateBitCast(Op0,
15641 llvm::FixedVectorType::get(Int64Ty, 2));
15642 return Builder.CreateBitCast(
15643 Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType);
15644 }
15645 case PPC::BI__builtin_altivec_vpopcntb:
15646 case PPC::BI__builtin_altivec_vpopcnth:
15647 case PPC::BI__builtin_altivec_vpopcntw:
15648 case PPC::BI__builtin_altivec_vpopcntd: {
15649 llvm::Type *ResultType = ConvertType(E->getType());
15650 Value *X = EmitScalarExpr(E->getArg(0));
15651 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
15652 return Builder.CreateCall(F, X);
15653 }
15654 case PPC::BI__builtin_altivec_vadduqm:
15655 case PPC::BI__builtin_altivec_vsubuqm: {
15656 Value *Op0 = EmitScalarExpr(E->getArg(0));
15657 Value *Op1 = EmitScalarExpr(E->getArg(1));
15658 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
15659 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1));
15660 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1));
15661 if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)
15662 return Builder.CreateAdd(Op0, Op1, "vadduqm");
15663 else
15664 return Builder.CreateSub(Op0, Op1, "vsubuqm");
15665 }
15666 case PPC::BI__builtin_altivec_vaddcuq_c:
15667 case PPC::BI__builtin_altivec_vsubcuq_c: {
15669 Value *Op0 = EmitScalarExpr(E->getArg(0));
15670 Value *Op1 = EmitScalarExpr(E->getArg(1));
15671 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
15672 llvm::IntegerType::get(getLLVMContext(), 128), 1);
15673 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
15674 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
15675 ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c)
15676 ? Intrinsic::ppc_altivec_vaddcuq
15677 : Intrinsic::ppc_altivec_vsubcuq;
15678 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
15679 }
15680 case PPC::BI__builtin_altivec_vaddeuqm_c:
15681 case PPC::BI__builtin_altivec_vaddecuq_c:
15682 case PPC::BI__builtin_altivec_vsubeuqm_c:
15683 case PPC::BI__builtin_altivec_vsubecuq_c: {
15685 Value *Op0 = EmitScalarExpr(E->getArg(0));
15686 Value *Op1 = EmitScalarExpr(E->getArg(1));
15687 Value *Op2 = EmitScalarExpr(E->getArg(2));
15688 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
15689 llvm::IntegerType::get(getLLVMContext(), 128), 1);
15690 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
15691 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
15692 Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty));
15693 switch (BuiltinID) {
15694 default:
15695 llvm_unreachable("Unsupported intrinsic!");
15696 case PPC::BI__builtin_altivec_vaddeuqm_c:
15697 ID = Intrinsic::ppc_altivec_vaddeuqm;
15698 break;
15699 case PPC::BI__builtin_altivec_vaddecuq_c:
15700 ID = Intrinsic::ppc_altivec_vaddecuq;
15701 break;
15702 case PPC::BI__builtin_altivec_vsubeuqm_c:
15703 ID = Intrinsic::ppc_altivec_vsubeuqm;
15704 break;
15705 case PPC::BI__builtin_altivec_vsubecuq_c:
15706 ID = Intrinsic::ppc_altivec_vsubecuq;
15707 break;
15708 }
15709 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
15710 }
15711 // Rotate and insert under mask operation.
15712 // __rldimi(rs, is, shift, mask)
15713 // (rotl64(rs, shift) & mask) | (is & ~mask)
15714 // __rlwimi(rs, is, shift, mask)
15715 // (rotl(rs, shift) & mask) | (is & ~mask)
15716 case PPC::BI__builtin_ppc_rldimi:
15717 case PPC::BI__builtin_ppc_rlwimi: {
15718 Value *Op0 = EmitScalarExpr(E->getArg(0));
15719 Value *Op1 = EmitScalarExpr(E->getArg(1));
15720 Value *Op2 = EmitScalarExpr(E->getArg(2));
15721 Value *Op3 = EmitScalarExpr(E->getArg(3));
15722 llvm::Type *Ty = Op0->getType();
15723 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
15724 if (BuiltinID == PPC::BI__builtin_ppc_rldimi)
15725 Op2 = Builder.CreateZExt(Op2, Int64Ty);
15726 Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
15727 Value *X = Builder.CreateAnd(Shift, Op3);
15728 Value *Y = Builder.CreateAnd(Op1, Builder.CreateNot(Op3));
15729 return Builder.CreateOr(X, Y);
15730 }
15731 // Rotate and insert under mask operation.
15732 // __rlwnm(rs, shift, mask)
15733 // rotl(rs, shift) & mask
15734 case PPC::BI__builtin_ppc_rlwnm: {
15735 Value *Op0 = EmitScalarExpr(E->getArg(0));
15736 Value *Op1 = EmitScalarExpr(E->getArg(1));
15737 Value *Op2 = EmitScalarExpr(E->getArg(2));
15738 llvm::Type *Ty = Op0->getType();
15739 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
15740 Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op1});
15741 return Builder.CreateAnd(Shift, Op2);
15742 }
15743 case PPC::BI__builtin_ppc_poppar4:
15744 case PPC::BI__builtin_ppc_poppar8: {
15745 Value *Op0 = EmitScalarExpr(E->getArg(0));
15746 llvm::Type *ArgType = Op0->getType();
15747 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
15748 Value *Tmp = Builder.CreateCall(F, Op0);
15749
15750 llvm::Type *ResultType = ConvertType(E->getType());
15751 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
15752 if (Result->getType() != ResultType)
15753 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
15754 "cast");
15755 return Result;
15756 }
15757 case PPC::BI__builtin_ppc_cmpb: {
15758 Value *Op0 = EmitScalarExpr(E->getArg(0));
15759 Value *Op1 = EmitScalarExpr(E->getArg(1));
15760 if (getTarget().getTriple().isPPC64()) {
15761 Function *F =
15762 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});
15763 return Builder.CreateCall(F, {Op0, Op1}, "cmpb");
15764 }
15765 // For 32 bit, emit the code as below:
15766 // %conv = trunc i64 %a to i32
15767 // %conv1 = trunc i64 %b to i32
15768 // %shr = lshr i64 %a, 32
15769 // %conv2 = trunc i64 %shr to i32
15770 // %shr3 = lshr i64 %b, 32
15771 // %conv4 = trunc i64 %shr3 to i32
15772 // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
15773 // %conv5 = zext i32 %0 to i64
15774 // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
15775 // %conv614 = zext i32 %1 to i64
15776 // %shl = shl nuw i64 %conv614, 32
15777 // %or = or i64 %shl, %conv5
15778 // ret i64 %or
15779 Function *F =
15780 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});
15781 Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty);
15782 Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty);
15783 Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32);
15784 Value *ArgOneHi =
15785 Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty);
15786 Value *ArgTwoHi =
15787 Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty);
15788 Value *ResLo = Builder.CreateZExt(
15789 Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty);
15790 Value *ResHiShift = Builder.CreateZExt(
15791 Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty);
15792 Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt);
15793 return Builder.CreateOr(ResLo, ResHi);
15794 }
15795 // Copy sign
15796 case PPC::BI__builtin_vsx_xvcpsgnsp:
15797 case PPC::BI__builtin_vsx_xvcpsgndp: {
15798 llvm::Type *ResultType = ConvertType(E->getType());
15799 Value *X = EmitScalarExpr(E->getArg(0));
15800 Value *Y = EmitScalarExpr(E->getArg(1));
15801 ID = Intrinsic::copysign;
15802 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
15803 return Builder.CreateCall(F, {X, Y});
15804 }
15805 // Rounding/truncation
15806 case PPC::BI__builtin_vsx_xvrspip:
15807 case PPC::BI__builtin_vsx_xvrdpip:
15808 case PPC::BI__builtin_vsx_xvrdpim:
15809 case PPC::BI__builtin_vsx_xvrspim:
15810 case PPC::BI__builtin_vsx_xvrdpi:
15811 case PPC::BI__builtin_vsx_xvrspi:
15812 case PPC::BI__builtin_vsx_xvrdpic:
15813 case PPC::BI__builtin_vsx_xvrspic:
15814 case PPC::BI__builtin_vsx_xvrdpiz:
15815 case PPC::BI__builtin_vsx_xvrspiz: {
15816 llvm::Type *ResultType = ConvertType(E->getType());
15817 Value *X = EmitScalarExpr(E->getArg(0));
15818 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
15819 BuiltinID == PPC::BI__builtin_vsx_xvrspim)
15820 ID = Builder.getIsFPConstrained()
15821 ? Intrinsic::experimental_constrained_floor
15822 : Intrinsic::floor;
15823 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
15824 BuiltinID == PPC::BI__builtin_vsx_xvrspi)
15825 ID = Builder.getIsFPConstrained()
15826 ? Intrinsic::experimental_constrained_round
15827 : Intrinsic::round;
15828 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
15829 BuiltinID == PPC::BI__builtin_vsx_xvrspic)
15830 ID = Builder.getIsFPConstrained()
15831 ? Intrinsic::experimental_constrained_rint
15832 : Intrinsic::rint;
15833 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
15834 BuiltinID == PPC::BI__builtin_vsx_xvrspip)
15835 ID = Builder.getIsFPConstrained()
15836 ? Intrinsic::experimental_constrained_ceil
15837 : Intrinsic::ceil;
15838 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
15839 BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
15840 ID = Builder.getIsFPConstrained()
15841 ? Intrinsic::experimental_constrained_trunc
15842 : Intrinsic::trunc;
15843 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
15844 return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X)
15845 : Builder.CreateCall(F, X);
15846 }
15847
15848 // Absolute value
15849 case PPC::BI__builtin_vsx_xvabsdp:
15850 case PPC::BI__builtin_vsx_xvabssp: {
15851 llvm::Type *ResultType = ConvertType(E->getType());
15852 Value *X = EmitScalarExpr(E->getArg(0));
15853 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
15854 return Builder.CreateCall(F, X);
15855 }
15856
15857 // Fastmath by default
15858 case PPC::BI__builtin_ppc_recipdivf:
15859 case PPC::BI__builtin_ppc_recipdivd:
15860 case PPC::BI__builtin_ppc_rsqrtf:
15861 case PPC::BI__builtin_ppc_rsqrtd: {
15862 FastMathFlags FMF = Builder.getFastMathFlags();
15863 Builder.getFastMathFlags().setFast();
15864 llvm::Type *ResultType = ConvertType(E->getType());
15865 Value *X = EmitScalarExpr(E->getArg(0));
15866
15867 if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
15868 BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
15869 Value *Y = EmitScalarExpr(E->getArg(1));
15870 Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv");
15871 Builder.getFastMathFlags() &= (FMF);
15872 return FDiv;
15873 }
15874 auto *One = ConstantFP::get(ResultType, 1.0);
15875 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
15876 Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt");
15877 Builder.getFastMathFlags() &= (FMF);
15878 return FDiv;
15879 }
15880 case PPC::BI__builtin_ppc_alignx: {
15881 Value *Op0 = EmitScalarExpr(E->getArg(0));
15882 Value *Op1 = EmitScalarExpr(E->getArg(1));
15883 ConstantInt *AlignmentCI = cast<ConstantInt>(Op0);
15884 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
15885 AlignmentCI = ConstantInt::get(AlignmentCI->getType(),
15886 llvm::Value::MaximumAlignment);
15887
15889 /*The expr loc is sufficient.*/ SourceLocation(),
15890 AlignmentCI, nullptr);
15891 return Op1;
15892 }
15893 case PPC::BI__builtin_ppc_rdlam: {
15894 Value *Op0 = EmitScalarExpr(E->getArg(0));
15895 Value *Op1 = EmitScalarExpr(E->getArg(1));
15896 Value *Op2 = EmitScalarExpr(E->getArg(2));
15897 llvm::Type *Ty = Op0->getType();
15898 Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false);
15899 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
15900 Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt});
15901 return Builder.CreateAnd(Rotate, Op2);
15902 }
15903 case PPC::BI__builtin_ppc_load2r: {
15904 Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
15905 Value *Op0 = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
15906 Value *LoadIntrinsic = Builder.CreateCall(F, {Op0});
15907 return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
15908 }
15909 // FMA variations
15910 case PPC::BI__builtin_ppc_fnmsub:
15911 case PPC::BI__builtin_ppc_fnmsubs:
15912 case PPC::BI__builtin_vsx_xvmaddadp:
15913 case PPC::BI__builtin_vsx_xvmaddasp:
15914 case PPC::BI__builtin_vsx_xvnmaddadp:
15915 case PPC::BI__builtin_vsx_xvnmaddasp:
15916 case PPC::BI__builtin_vsx_xvmsubadp:
15917 case PPC::BI__builtin_vsx_xvmsubasp:
15918 case PPC::BI__builtin_vsx_xvnmsubadp:
15919 case PPC::BI__builtin_vsx_xvnmsubasp: {
15920 llvm::Type *ResultType = ConvertType(E->getType());
15921 Value *X = EmitScalarExpr(E->getArg(0));
15922 Value *Y = EmitScalarExpr(E->getArg(1));
15923 Value *Z = EmitScalarExpr(E->getArg(2));
15924 llvm::Function *F;
15925 if (Builder.getIsFPConstrained())
15926 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
15927 else
15928 F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
15929 switch (BuiltinID) {
15930 case PPC::BI__builtin_vsx_xvmaddadp:
15931 case PPC::BI__builtin_vsx_xvmaddasp:
15932 if (Builder.getIsFPConstrained())
15933 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
15934 else
15935 return Builder.CreateCall(F, {X, Y, Z});
15936 case PPC::BI__builtin_vsx_xvnmaddadp:
15937 case PPC::BI__builtin_vsx_xvnmaddasp:
15938 if (Builder.getIsFPConstrained())
15939 return Builder.CreateFNeg(
15940 Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
15941 else
15942 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
15943 case PPC::BI__builtin_vsx_xvmsubadp:
15944 case PPC::BI__builtin_vsx_xvmsubasp:
15945 if (Builder.getIsFPConstrained())
15946 return Builder.CreateConstrainedFPCall(
15947 F, {X, Y, Builder.CreateFNeg(Z, "neg")});
15948 else
15949 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
15950 case PPC::BI__builtin_ppc_fnmsub:
15951 case PPC::BI__builtin_ppc_fnmsubs:
15952 case PPC::BI__builtin_vsx_xvnmsubadp:
15953 case PPC::BI__builtin_vsx_xvnmsubasp:
15954 if (Builder.getIsFPConstrained())
15955 return Builder.CreateFNeg(
15956 Builder.CreateConstrainedFPCall(
15957 F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
15958 "neg");
15959 else
15960 return Builder.CreateCall(
15961 CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z});
15962 }
15963 llvm_unreachable("Unknown FMA operation");
15964 return nullptr; // Suppress no-return warning
15965 }
15966
15967 case PPC::BI__builtin_vsx_insertword: {
15968 Value *Op0 = EmitScalarExpr(E->getArg(0));
15969 Value *Op1 = EmitScalarExpr(E->getArg(1));
15970 Value *Op2 = EmitScalarExpr(E->getArg(2));
15971 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
15972
15973 // Third argument is a compile time constant int. It must be clamped to
15974 // to the range [0, 12].
15975 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
15976 assert(ArgCI &&
15977 "Third arg to xxinsertw intrinsic must be constant integer");
15978 const int64_t MaxIndex = 12;
15979 int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
15980
15981 // The builtin semantics don't exactly match the xxinsertw instructions
15982 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
15983 // word from the first argument, and inserts it in the second argument. The
15984 // instruction extracts the word from its second input register and inserts
15985 // it into its first input register, so swap the first and second arguments.
15986 std::swap(Op0, Op1);
15987
15988 // Need to cast the second argument from a vector of unsigned int to a
15989 // vector of long long.
15990 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
15991
15992 if (getTarget().isLittleEndian()) {
15993 // Reverse the double words in the vector we will extract from.
15994 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
15995 Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0});
15996
15997 // Reverse the index.
15998 Index = MaxIndex - Index;
15999 }
16000
16001 // Intrinsic expects the first arg to be a vector of int.
16002 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
16003 Op2 = ConstantInt::getSigned(Int32Ty, Index);
16004 return Builder.CreateCall(F, {Op0, Op1, Op2});
16005 }
16006
16007 case PPC::BI__builtin_vsx_extractuword: {
16008 Value *Op0 = EmitScalarExpr(E->getArg(0));
16009 Value *Op1 = EmitScalarExpr(E->getArg(1));
16010 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
16011
16012 // Intrinsic expects the first argument to be a vector of doublewords.
16013 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
16014
16015 // The second argument is a compile time constant int that needs to
16016 // be clamped to the range [0, 12].
16017 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1);
16018 assert(ArgCI &&
16019 "Second Arg to xxextractuw intrinsic must be a constant integer!");
16020 const int64_t MaxIndex = 12;
16021 int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
16022
16023 if (getTarget().isLittleEndian()) {
16024 // Reverse the index.
16025 Index = MaxIndex - Index;
16026 Op1 = ConstantInt::getSigned(Int32Ty, Index);
16027
16028 // Emit the call, then reverse the double words of the results vector.
16029 Value *Call = Builder.CreateCall(F, {Op0, Op1});
16030
16031 Value *ShuffleCall =
16032 Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});
16033 return ShuffleCall;
16034 } else {
16035 Op1 = ConstantInt::getSigned(Int32Ty, Index);
16036 return Builder.CreateCall(F, {Op0, Op1});
16037 }
16038 }
16039
16040 case PPC::BI__builtin_vsx_xxpermdi: {
16041 Value *Op0 = EmitScalarExpr(E->getArg(0));
16042 Value *Op1 = EmitScalarExpr(E->getArg(1));
16043 Value *Op2 = EmitScalarExpr(E->getArg(2));
16044 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
16045 assert(ArgCI && "Third arg must be constant integer!");
16046
16047 unsigned Index = ArgCI->getZExtValue();
16048 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
16049 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
16050
16051 // Account for endianness by treating this as just a shuffle. So we use the
16052 // same indices for both LE and BE in order to produce expected results in
16053 // both cases.
16054 int ElemIdx0 = (Index & 2) >> 1;
16055 int ElemIdx1 = 2 + (Index & 1);
16056
16057 int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
16058 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
16059 QualType BIRetType = E->getType();
16060 auto RetTy = ConvertType(BIRetType);
16061 return Builder.CreateBitCast(ShuffleCall, RetTy);
16062 }
16063
16064 case PPC::BI__builtin_vsx_xxsldwi: {
16065 Value *Op0 = EmitScalarExpr(E->getArg(0));
16066 Value *Op1 = EmitScalarExpr(E->getArg(1));
16067 Value *Op2 = EmitScalarExpr(E->getArg(2));
16068 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
16069 assert(ArgCI && "Third argument must be a compile time constant");
16070 unsigned Index = ArgCI->getZExtValue() & 0x3;
16071 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
16072 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4));
16073
16074 // Create a shuffle mask
16075 int ElemIdx0;
16076 int ElemIdx1;
16077 int ElemIdx2;
16078 int ElemIdx3;
16079 if (getTarget().isLittleEndian()) {
16080 // Little endian element N comes from element 8+N-Index of the
16081 // concatenated wide vector (of course, using modulo arithmetic on
16082 // the total number of elements).
16083 ElemIdx0 = (8 - Index) % 8;
16084 ElemIdx1 = (9 - Index) % 8;
16085 ElemIdx2 = (10 - Index) % 8;
16086 ElemIdx3 = (11 - Index) % 8;
16087 } else {
16088 // Big endian ElemIdx<N> = Index + N
16089 ElemIdx0 = Index;
16090 ElemIdx1 = Index + 1;
16091 ElemIdx2 = Index + 2;
16092 ElemIdx3 = Index + 3;
16093 }
16094
16095 int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
16096 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
16097 QualType BIRetType = E->getType();
16098 auto RetTy = ConvertType(BIRetType);
16099 return Builder.CreateBitCast(ShuffleCall, RetTy);
16100 }
16101
16102 case PPC::BI__builtin_pack_vector_int128: {
16103 Value *Op0 = EmitScalarExpr(E->getArg(0));
16104 Value *Op1 = EmitScalarExpr(E->getArg(1));
16105 bool isLittleEndian = getTarget().isLittleEndian();
16106 Value *UndefValue =
16107 llvm::UndefValue::get(llvm::FixedVectorType::get(Op0->getType(), 2));
16108 Value *Res = Builder.CreateInsertElement(
16109 UndefValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0));
16110 Res = Builder.CreateInsertElement(Res, Op1,
16111 (uint64_t)(isLittleEndian ? 0 : 1));
16112 return Builder.CreateBitCast(Res, ConvertType(E->getType()));
16113 }
16114
16115 case PPC::BI__builtin_unpack_vector_int128: {
16116 Value *Op0 = EmitScalarExpr(E->getArg(0));
16117 Value *Op1 = EmitScalarExpr(E->getArg(1));
16118 ConstantInt *Index = cast<ConstantInt>(Op1);
16119 Value *Unpacked = Builder.CreateBitCast(
16120 Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
16121
16122 if (getTarget().isLittleEndian())
16123 Index = ConstantInt::get(Index->getType(), 1 - Index->getZExtValue());
16124
16125 return Builder.CreateExtractElement(Unpacked, Index);
16126 }
16127
16128 case PPC::BI__builtin_ppc_sthcx: {
16129 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);
16130 Value *Op0 = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
16131 Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty);
16132 return Builder.CreateCall(F, {Op0, Op1});
16133 }
16134
16135 // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
16136 // Some of the MMA instructions accumulate their result into an existing
16137 // accumulator whereas the others generate a new accumulator. So we need to
16138 // use custom code generation to expand a builtin call with a pointer to a
16139 // load (if the corresponding instruction accumulates its result) followed by
16140 // the call to the intrinsic and a store of the result.
16141#define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate) \
16142 case PPC::BI__builtin_##Name:
16143#include "clang/Basic/BuiltinsPPC.def"
16144 {
16146 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
16147 if (E->getArg(i)->getType()->isArrayType())
16148 Ops.push_back(EmitArrayToPointerDecay(E->getArg(i)).getPointer());
16149 else
16150 Ops.push_back(EmitScalarExpr(E->getArg(i)));
16151 // The first argument of these two builtins is a pointer used to store their
16152 // result. However, the llvm intrinsics return their result in multiple
16153 // return values. So, here we emit code extracting these values from the
16154 // intrinsic results and storing them using that pointer.
16155 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
16156 BuiltinID == PPC::BI__builtin_vsx_disassemble_pair ||
16157 BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {
16158 unsigned NumVecs = 2;
16159 auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
16160 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
16161 NumVecs = 4;
16162 Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
16163 }
16164 llvm::Function *F = CGM.getIntrinsic(Intrinsic);
16166 Value *Vec = Builder.CreateLoad(Addr);
16167 Value *Call = Builder.CreateCall(F, {Vec});
16168 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16);
16169 Value *Ptr = Builder.CreateBitCast(Ops[0], VTy->getPointerTo());
16170 for (unsigned i=0; i<NumVecs; i++) {
16171 Value *Vec = Builder.CreateExtractValue(Call, i);
16172 llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i);
16173 Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index);
16174 Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16));
16175 }
16176 return Call;
16177 }
16178 if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
16179 BuiltinID == PPC::BI__builtin_mma_build_acc) {
16180 // Reverse the order of the operands for LE, so the
16181 // same builtin call can be used on both LE and BE
16182 // without the need for the programmer to swap operands.
16183 // The operands are reversed starting from the second argument,
16184 // the first operand is the pointer to the pair/accumulator
16185 // that is being built.
16186 if (getTarget().isLittleEndian())
16187 std::reverse(Ops.begin() + 1, Ops.end());
16188 }
16189 bool Accumulate;
16190 switch (BuiltinID) {
16191 #define CUSTOM_BUILTIN(Name, Intr, Types, Acc) \
16192 case PPC::BI__builtin_##Name: \
16193 ID = Intrinsic::ppc_##Intr; \
16194 Accumulate = Acc; \
16195 break;
16196 #include "clang/Basic/BuiltinsPPC.def"
16197 }
16198 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
16199 BuiltinID == PPC::BI__builtin_vsx_stxvp ||
16200 BuiltinID == PPC::BI__builtin_mma_lxvp ||
16201 BuiltinID == PPC::BI__builtin_mma_stxvp) {
16202 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
16203 BuiltinID == PPC::BI__builtin_mma_lxvp) {
16204 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
16205 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
16206 } else {
16207 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
16208 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
16209 }
16210 Ops.pop_back();
16211 llvm::Function *F = CGM.getIntrinsic(ID);
16212 return Builder.CreateCall(F, Ops, "");
16213 }
16214 SmallVector<Value*, 4> CallOps;
16215 if (Accumulate) {
16217 Value *Acc = Builder.CreateLoad(Addr);
16218 CallOps.push_back(Acc);
16219 }
16220 for (unsigned i=1; i<Ops.size(); i++)
16221 CallOps.push_back(Ops[i]);
16222 llvm::Function *F = CGM.getIntrinsic(ID);
16223 Value *Call = Builder.CreateCall(F, CallOps);
16224 return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign(64));
16225 }
16226
16227 case PPC::BI__builtin_ppc_compare_and_swap:
16228 case PPC::BI__builtin_ppc_compare_and_swaplp: {
16230 Address OldValAddr = EmitPointerWithAlignment(E->getArg(1));
16231 Value *OldVal = Builder.CreateLoad(OldValAddr);
16232 QualType AtomicTy = E->getArg(0)->getType()->getPointeeType();
16233 LValue LV = MakeAddrLValue(Addr, AtomicTy);
16234 Value *Op2 = EmitScalarExpr(E->getArg(2));
16235 auto Pair = EmitAtomicCompareExchange(
16236 LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(),
16237 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);
16238 // Unlike c11's atomic_compare_exchange, accroding to
16239 // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
16240 // > In either case, the contents of the memory location specified by addr
16241 // > are copied into the memory location specified by old_val_addr.
16242 // But it hasn't specified storing to OldValAddr is atomic or not and
16243 // which order to use. Now following XL's codegen, treat it as a normal
16244 // store.
16245 Value *LoadedVal = Pair.first.getScalarVal();
16246 Builder.CreateStore(LoadedVal, OldValAddr);
16247 return Builder.CreateZExt(Pair.second, Builder.getInt32Ty());
16248 }
16249 case PPC::BI__builtin_ppc_fetch_and_add:
16250 case PPC::BI__builtin_ppc_fetch_and_addlp: {
16251 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
16252 llvm::AtomicOrdering::Monotonic);
16253 }
16254 case PPC::BI__builtin_ppc_fetch_and_and:
16255 case PPC::BI__builtin_ppc_fetch_and_andlp: {
16256 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
16257 llvm::AtomicOrdering::Monotonic);
16258 }
16259
16260 case PPC::BI__builtin_ppc_fetch_and_or:
16261 case PPC::BI__builtin_ppc_fetch_and_orlp: {
16262 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
16263 llvm::AtomicOrdering::Monotonic);
16264 }
16265 case PPC::BI__builtin_ppc_fetch_and_swap:
16266 case PPC::BI__builtin_ppc_fetch_and_swaplp: {
16267 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
16268 llvm::AtomicOrdering::Monotonic);
16269 }
16270 case PPC::BI__builtin_ppc_ldarx:
16271 case PPC::BI__builtin_ppc_lwarx:
16272 case PPC::BI__builtin_ppc_lharx:
16273 case PPC::BI__builtin_ppc_lbarx:
16274 return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E);
16275 case PPC::BI__builtin_ppc_mfspr: {
16276 Value *Op0 = EmitScalarExpr(E->getArg(0));
16277 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
16278 ? Int32Ty
16279 : Int64Ty;
16280 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType);
16281 return Builder.CreateCall(F, {Op0});
16282 }
16283 case PPC::BI__builtin_ppc_mtspr: {
16284 Value *Op0 = EmitScalarExpr(E->getArg(0));
16285 Value *Op1 = EmitScalarExpr(E->getArg(1));
16286 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
16287 ? Int32Ty
16288 : Int64Ty;
16289 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType);
16290 return Builder.CreateCall(F, {Op0, Op1});
16291 }
16292 case PPC::BI__builtin_ppc_popcntb: {
16293 Value *ArgValue = EmitScalarExpr(E->getArg(0));
16294 llvm::Type *ArgType = ArgValue->getType();
16295 Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType});
16296 return Builder.CreateCall(F, {ArgValue}, "popcntb");
16297 }
16298 case PPC::BI__builtin_ppc_mtfsf: {
16299 // The builtin takes a uint32 that needs to be cast to an
16300 // f64 to be passed to the intrinsic.
16301 Value *Op0 = EmitScalarExpr(E->getArg(0));
16302 Value *Op1 = EmitScalarExpr(E->getArg(1));
16303 Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy);
16304 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf);
16305 return Builder.CreateCall(F, {Op0, Cast}, "");
16306 }
16307
16308 case PPC::BI__builtin_ppc_swdiv_nochk:
16309 case PPC::BI__builtin_ppc_swdivs_nochk: {
16310 Value *Op0 = EmitScalarExpr(E->getArg(0));
16311 Value *Op1 = EmitScalarExpr(E->getArg(1));
16312 FastMathFlags FMF = Builder.getFastMathFlags();
16313 Builder.getFastMathFlags().setFast();
16314 Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk");
16315 Builder.getFastMathFlags() &= (FMF);
16316 return FDiv;
16317 }
16318 case PPC::BI__builtin_ppc_fric:
16320 *this, E, Intrinsic::rint,
16321 Intrinsic::experimental_constrained_rint))
16322 .getScalarVal();
16323 case PPC::BI__builtin_ppc_frim:
16324 case PPC::BI__builtin_ppc_frims:
16326 *this, E, Intrinsic::floor,
16327 Intrinsic::experimental_constrained_floor))
16328 .getScalarVal();
16329 case PPC::BI__builtin_ppc_frin:
16330 case PPC::BI__builtin_ppc_frins:
16332 *this, E, Intrinsic::round,
16333 Intrinsic::experimental_constrained_round))
16334 .getScalarVal();
16335 case PPC::BI__builtin_ppc_frip:
16336 case PPC::BI__builtin_ppc_frips:
16338 *this, E, Intrinsic::ceil,
16339 Intrinsic::experimental_constrained_ceil))
16340 .getScalarVal();
16341 case PPC::BI__builtin_ppc_friz:
16342 case PPC::BI__builtin_ppc_frizs:
16344 *this, E, Intrinsic::trunc,
16345 Intrinsic::experimental_constrained_trunc))
16346 .getScalarVal();
16347 case PPC::BI__builtin_ppc_fsqrt:
16348 case PPC::BI__builtin_ppc_fsqrts:
16350 *this, E, Intrinsic::sqrt,
16351 Intrinsic::experimental_constrained_sqrt))
16352 .getScalarVal();
16353 case PPC::BI__builtin_ppc_test_data_class: {
16354 Value *Op0 = EmitScalarExpr(E->getArg(0));
16355 Value *Op1 = EmitScalarExpr(E->getArg(1));
16356 llvm::Type *ArgType = Op0->getType();
16357 unsigned IntrinsicID;
16358 if (ArgType->isDoubleTy())
16359 IntrinsicID = Intrinsic::ppc_test_data_class_d;
16360 else if (ArgType->isFloatTy())
16361 IntrinsicID = Intrinsic::ppc_test_data_class_f;
16362 else
16363 llvm_unreachable("Invalid Argument Type");
16364 return Builder.CreateCall(CGM.getIntrinsic(IntrinsicID), {Op0, Op1},
16365 "test_data_class");
16366 }
16367 case PPC::BI__builtin_ppc_maxfe: {
16368 Value *Op0 = EmitScalarExpr(E->getArg(0));
16369 Value *Op1 = EmitScalarExpr(E->getArg(1));
16370 Value *Op2 = EmitScalarExpr(E->getArg(2));
16371 Value *Op3 = EmitScalarExpr(E->getArg(3));
16372 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe),
16373 {Op0, Op1, Op2, Op3});
16374 }
16375 case PPC::BI__builtin_ppc_maxfl: {
16376 Value *Op0 = EmitScalarExpr(E->getArg(0));
16377 Value *Op1 = EmitScalarExpr(E->getArg(1));
16378 Value *Op2 = EmitScalarExpr(E->getArg(2));
16379 Value *Op3 = EmitScalarExpr(E->getArg(3));
16380 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl),
16381 {Op0, Op1, Op2, Op3});
16382 }
16383 case PPC::BI__builtin_ppc_maxfs: {
16384 Value *Op0 = EmitScalarExpr(E->getArg(0));
16385 Value *Op1 = EmitScalarExpr(E->getArg(1));
16386 Value *Op2 = EmitScalarExpr(E->getArg(2));
16387 Value *Op3 = EmitScalarExpr(E->getArg(3));
16388 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs),
16389 {Op0, Op1, Op2, Op3});
16390 }
16391 case PPC::BI__builtin_ppc_minfe: {
16392 Value *Op0 = EmitScalarExpr(E->getArg(0));
16393 Value *Op1 = EmitScalarExpr(E->getArg(1));
16394 Value *Op2 = EmitScalarExpr(E->getArg(2));
16395 Value *Op3 = EmitScalarExpr(E->getArg(3));
16396 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe),
16397 {Op0, Op1, Op2, Op3});
16398 }
16399 case PPC::BI__builtin_ppc_minfl: {
16400 Value *Op0 = EmitScalarExpr(E->getArg(0));
16401 Value *Op1 = EmitScalarExpr(E->getArg(1));
16402 Value *Op2 = EmitScalarExpr(E->getArg(2));
16403 Value *Op3 = EmitScalarExpr(E->getArg(3));
16404 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl),
16405 {Op0, Op1, Op2, Op3});
16406 }
16407 case PPC::BI__builtin_ppc_minfs: {
16408 Value *Op0 = EmitScalarExpr(E->getArg(0));
16409 Value *Op1 = EmitScalarExpr(E->getArg(1));
16410 Value *Op2 = EmitScalarExpr(E->getArg(2));
16411 Value *Op3 = EmitScalarExpr(E->getArg(3));
16412 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs),
16413 {Op0, Op1, Op2, Op3});
16414 }
16415 case PPC::BI__builtin_ppc_swdiv:
16416 case PPC::BI__builtin_ppc_swdivs: {
16417 Value *Op0 = EmitScalarExpr(E->getArg(0));
16418 Value *Op1 = EmitScalarExpr(E->getArg(1));
16419 return Builder.CreateFDiv(Op0, Op1, "swdiv");
16420 }
16421 }
16422}
16423
16424namespace {
16425// If \p E is not null pointer, insert address space cast to match return
16426// type of \p E if necessary.
16427Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
16428 const CallExpr *E = nullptr) {
16429 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
16430 auto *Call = CGF.Builder.CreateCall(F);
16431 Call->addRetAttr(
16432 Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
16433 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
16434 if (!E)
16435 return Call;
16436 QualType BuiltinRetType = E->getType();
16437 auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
16438 if (RetTy == Call->getType())
16439 return Call;
16440 return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
16441}
16442
16443Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
16444 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
16445 auto *Call = CGF.Builder.CreateCall(F);
16446 Call->addRetAttr(
16447 Attribute::getWithDereferenceableBytes(Call->getContext(), 256));
16448 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));
16449 return Call;
16450}
16451
16452// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
16453Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
16454 bool IsCOV_5 = CGF.getTarget().getTargetOpts().CodeObjectVersion ==
16456 Constant *Offset;
16457 Value *DP;
16458 if (IsCOV_5) {
16459 // Indexing the implicit kernarg segment.
16460 Offset = llvm::ConstantInt::get(CGF.Int32Ty, 12 + Index * 2);
16461 DP = EmitAMDGPUImplicitArgPtr(CGF);
16462 } else {
16463 // Indexing the HSA kernel_dispatch_packet struct.
16464 Offset = llvm::ConstantInt::get(CGF.Int32Ty, 4 + Index * 2);
16465 DP = EmitAMDGPUDispatchPtr(CGF);
16466 }
16467
16468 auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
16469 auto *DstTy =
16470 CGF.Int16Ty->getPointerTo(GEP->getType()->getPointerAddressSpace());
16471 auto *Cast = CGF.Builder.CreateBitCast(GEP, DstTy);
16472 auto *LD = CGF.Builder.CreateLoad(
16473 Address(Cast, CGF.Int16Ty, CharUnits::fromQuantity(2)));
16474 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
16475 llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
16476 APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
16477 LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
16478 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
16479 llvm::MDNode::get(CGF.getLLVMContext(), None));
16480 return LD;
16481}
16482
16483// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
16484Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
16485 const unsigned XOffset = 12;
16486 auto *DP = EmitAMDGPUDispatchPtr(CGF);
16487 // Indexing the HSA kernel_dispatch_packet struct.
16488 auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
16489 auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
16490 auto *DstTy =
16491 CGF.Int32Ty->getPointerTo(GEP->getType()->getPointerAddressSpace());
16492 auto *Cast = CGF.Builder.CreateBitCast(GEP, DstTy);
16493 auto *LD = CGF.Builder.CreateLoad(
16494 Address(Cast, CGF.Int32Ty, CharUnits::fromQuantity(4)));
16495 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
16496 llvm::MDNode::get(CGF.getLLVMContext(), None));
16497 return LD;
16498}
16499} // namespace
16500
16501// For processing memory ordering and memory scope arguments of various
16502// amdgcn builtins.
16503// \p Order takes a C++11 comptabile memory-ordering specifier and converts
16504// it into LLVM's memory ordering specifier using atomic C ABI, and writes
16505// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
16506// specific SyncScopeID and writes it to \p SSID.
16508 llvm::AtomicOrdering &AO,
16509 llvm::SyncScope::ID &SSID) {
16510 if (isa<llvm::ConstantInt>(Order)) {
16511 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
16512
16513 // Map C11/C++11 memory ordering to LLVM memory ordering
16514 assert(llvm::isValidAtomicOrderingCABI(ord));
16515 switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
16516 case llvm::AtomicOrderingCABI::acquire:
16517 case llvm::AtomicOrderingCABI::consume:
16518 AO = llvm::AtomicOrdering::Acquire;
16519 break;
16520 case llvm::AtomicOrderingCABI::release:
16521 AO = llvm::AtomicOrdering::Release;
16522 break;
16523 case llvm::AtomicOrderingCABI::acq_rel:
16524 AO = llvm::AtomicOrdering::AcquireRelease;
16525 break;
16526 case llvm::AtomicOrderingCABI::seq_cst:
16527 AO = llvm::AtomicOrdering::SequentiallyConsistent;
16528 break;
16529 case llvm::AtomicOrderingCABI::relaxed:
16530 AO = llvm::AtomicOrdering::Monotonic;
16531 break;
16532 }
16533
16534 StringRef scp;
16535 llvm::getConstantStringInfo(Scope, scp);
16536 SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
16537 return true;
16538 }
16539 return false;
16540}
16541
16543 const CallExpr *E) {
16544 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
16545 llvm::SyncScope::ID SSID;
16546 switch (BuiltinID) {
16547 case AMDGPU::BI__builtin_amdgcn_div_scale:
16548 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
16549 // Translate from the intrinsics's struct return to the builtin's out
16550 // argument.
16551
16552 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
16553
16554 llvm::Value *X = EmitScalarExpr(E->getArg(0));
16555 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
16556 llvm::Value *Z = EmitScalarExpr(E->getArg(2));
16557
16558 llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
16559 X->getType());
16560
16561 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
16562
16563 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
16564 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
16565
16566 llvm::Type *RealFlagType = FlagOutPtr.getElementType();
16567
16568 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
16569 Builder.CreateStore(FlagExt, FlagOutPtr);
16570 return Result;
16571 }
16572 case AMDGPU::BI__builtin_amdgcn_div_fmas:
16573 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
16574 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
16575 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
16576 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
16577 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
16578
16579 llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
16580 Src0->getType());
16581 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
16582 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
16583 }
16584
16585 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
16586 return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
16587 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
16588 return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_mov_dpp8);
16589 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
16590 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
16592 for (unsigned I = 0; I != E->getNumArgs(); ++I)
16593 Args.push_back(EmitScalarExpr(E->getArg(I)));
16594 assert(Args.size() == 5 || Args.size() == 6);
16595 if (Args.size() == 5)
16596 Args.insert(Args.begin(), llvm::UndefValue::get(Args[0]->getType()));
16597 Function *F =
16598 CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
16599 return Builder.CreateCall(F, Args);
16600 }
16601 case AMDGPU::BI__builtin_amdgcn_div_fixup:
16602 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
16603 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
16604 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
16605 case AMDGPU::BI__builtin_amdgcn_trig_preop:
16606 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
16607 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
16608 case AMDGPU::BI__builtin_amdgcn_rcp:
16609 case AMDGPU::BI__builtin_amdgcn_rcpf:
16610 case AMDGPU::BI__builtin_amdgcn_rcph:
16611 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
16612 case AMDGPU::BI__builtin_amdgcn_sqrt:
16613 case AMDGPU::BI__builtin_amdgcn_sqrtf:
16614 case AMDGPU::BI__builtin_amdgcn_sqrth:
16615 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sqrt);
16616 case AMDGPU::BI__builtin_amdgcn_rsq:
16617 case AMDGPU::BI__builtin_amdgcn_rsqf:
16618 case AMDGPU::BI__builtin_amdgcn_rsqh:
16619 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
16620 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
16621 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
16622 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
16623 case AMDGPU::BI__builtin_amdgcn_sinf:
16624 case AMDGPU::BI__builtin_amdgcn_sinh:
16625 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
16626 case AMDGPU::BI__builtin_amdgcn_cosf:
16627 case AMDGPU::BI__builtin_amdgcn_cosh:
16628 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
16629 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
16630 return EmitAMDGPUDispatchPtr(*this, E);
16631 case AMDGPU::BI__builtin_amdgcn_log_clampf:
16632 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
16633 case AMDGPU::BI__builtin_amdgcn_ldexp:
16634 case AMDGPU::BI__builtin_amdgcn_ldexpf:
16635 case AMDGPU::BI__builtin_amdgcn_ldexph:
16636 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
16637 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
16638 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
16639 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
16640 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
16641 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
16642 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
16643 Value *Src0 = EmitScalarExpr(E->getArg(0));
16644 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
16645 { Builder.getInt32Ty(), Src0->getType() });
16646 return Builder.CreateCall(F, Src0);
16647 }
16648 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
16649 Value *Src0 = EmitScalarExpr(E->getArg(0));
16650 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
16651 { Builder.getInt16Ty(), Src0->getType() });
16652 return Builder.CreateCall(F, Src0);
16653 }
16654 case AMDGPU::BI__builtin_amdgcn_fract:
16655 case AMDGPU::BI__builtin_amdgcn_fractf:
16656 case AMDGPU::BI__builtin_amdgcn_fracth:
16657 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
16658 case AMDGPU::BI__builtin_amdgcn_lerp:
16659 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
16660 case AMDGPU::BI__builtin_amdgcn_ubfe:
16661 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe);
16662 case AMDGPU::BI__builtin_amdgcn_sbfe:
16663 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe);
16664 case AMDGPU::BI__builtin_amdgcn_uicmp:
16665 case AMDGPU::BI__builtin_amdgcn_uicmpl:
16666 case AMDGPU::BI__builtin_amdgcn_sicmp:
16667 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
16668 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
16669 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
16670 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
16671
16672 // FIXME-GFX10: How should 32 bit mask be handled?
16673 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
16674 { Builder.getInt64Ty(), Src0->getType() });
16675 return Builder.CreateCall(F, { Src0, Src1, Src2 });
16676 }
16677 case AMDGPU::BI__builtin_amdgcn_fcmp:
16678 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
16679 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
16680 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
16681 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
16682
16683 // FIXME-GFX10: How should 32 bit mask be handled?
16684 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
16685 { Builder.getInt64Ty(), Src0->getType() });
16686 return Builder.CreateCall(F, { Src0, Src1, Src2 });
16687 }
16688 case AMDGPU::BI__builtin_amdgcn_class:
16689 case AMDGPU::BI__builtin_amdgcn_classf:
16690 case AMDGPU::BI__builtin_amdgcn_classh:
16691 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
16692 case AMDGPU::BI__builtin_amdgcn_fmed3f:
16693 case AMDGPU::BI__builtin_amdgcn_fmed3h:
16694 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
16695 case AMDGPU::BI__builtin_amdgcn_ds_append:
16696 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
16697 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
16698 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
16699 Value *Src0 = EmitScalarExpr(E->getArg(0));
16700 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
16701 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
16702 }
16703 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
16704 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
16705 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
16706 Intrinsic::ID Intrin;
16707 switch (BuiltinID) {
16708 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
16709 Intrin = Intrinsic::amdgcn_ds_fadd;
16710 break;
16711 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
16712 Intrin = Intrinsic::amdgcn_ds_fmin;
16713 break;
16714 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
16715 Intrin = Intrinsic::amdgcn_ds_fmax;
16716 break;
16717 }
16718 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
16719 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
16720 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
16721 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
16722 llvm::Value *Src4 = EmitScalarExpr(E->getArg(4));
16723 llvm::Function *F = CGM.getIntrinsic(Intrin, { Src1->getType() });
16724 llvm::FunctionType *FTy = F->getFunctionType();
16725 llvm::Type *PTy = FTy->getParamType(0);
16726 Src0 = Builder.CreatePointerBitCastOrAddrSpaceCast(Src0, PTy);
16727 return Builder.CreateCall(F, { Src0, Src1, Src2, Src3, Src4 });
16728 }
16729 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
16730 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
16731 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
16732 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
16733 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
16734 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
16735 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
16736 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
16737 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
16738 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: {
16739 Intrinsic::ID IID;
16740 llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
16741 switch (BuiltinID) {
16742 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
16743 ArgTy = llvm::Type::getFloatTy(getLLVMContext());
16744 IID = Intrinsic::amdgcn_global_atomic_fadd;
16745 break;
16746 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
16747 ArgTy = llvm::FixedVectorType::get(
16748 llvm::Type::getHalfTy(getLLVMContext()), 2);
16749 IID = Intrinsic::amdgcn_global_atomic_fadd;
16750 break;
16751 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
16752 IID = Intrinsic::amdgcn_global_atomic_fadd;
16753 break;
16754 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
16755 IID = Intrinsic::amdgcn_global_atomic_fmin;
16756 break;
16757 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
16758 IID = Intrinsic::amdgcn_global_atomic_fmax;
16759 break;
16760 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
16761 IID = Intrinsic::amdgcn_flat_atomic_fadd;
16762 break;
16763 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
16764 IID = Intrinsic::amdgcn_flat_atomic_fmin;
16765 break;
16766 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
16767 IID = Intrinsic::amdgcn_flat_atomic_fmax;
16768 break;
16769 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
16770 ArgTy = llvm::Type::getFloatTy(getLLVMContext());
16771 IID = Intrinsic::amdgcn_flat_atomic_fadd;
16772 break;
16773 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
16774 ArgTy = llvm::FixedVectorType::get(
16775 llvm::Type::getHalfTy(getLLVMContext()), 2);
16776 IID = Intrinsic::amdgcn_flat_atomic_fadd;
16777 break;
16778 }
16779 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
16780 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
16781 llvm::Function *F =
16782 CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()});
16783 return Builder.CreateCall(F, {Addr, Val});
16784 }
16785 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
16786 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
16787 Intrinsic::ID IID;
16788 switch (BuiltinID) {
16789 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
16790 IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16;
16791 break;
16792 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
16793 IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16;
16794 break;
16795 }
16796 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
16797 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
16798 llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()});
16799 return Builder.CreateCall(F, {Addr, Val});
16800 }
16801 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
16802 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: {
16803 Intrinsic::ID IID;
16804 llvm::Type *ArgTy;
16805 switch (BuiltinID) {
16806 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
16807 ArgTy = llvm::Type::getFloatTy(getLLVMContext());
16808 IID = Intrinsic::amdgcn_ds_fadd;
16809 break;
16810 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
16811 ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
16812 IID = Intrinsic::amdgcn_ds_fadd;
16813 break;
16814 }
16815 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
16816 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
16817 llvm::Constant *ZeroI32 = llvm::ConstantInt::getIntegerValue(
16818 llvm::Type::getInt32Ty(getLLVMContext()), APInt(32, 0, true));
16819 llvm::Constant *ZeroI1 = llvm::ConstantInt::getIntegerValue(
16820 llvm::Type::getInt1Ty(getLLVMContext()), APInt(1, 0));
16821 llvm::Function *F = CGM.getIntrinsic(IID, {ArgTy});
16822 return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
16823 }
16824 case AMDGPU::BI__builtin_amdgcn_read_exec: {
16825 CallInst *CI = cast<CallInst>(
16827 CI->setConvergent();
16828 return CI;
16829 }
16830 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
16831 case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
16832 StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ?
16833 "exec_lo" : "exec_hi";
16834 CallInst *CI = cast<CallInst>(
16836 CI->setConvergent();
16837 return CI;
16838 }
16839 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
16840 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
16841 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
16842 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
16843 llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
16844 llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
16845 llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
16846 llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
16847 llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
16848 llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
16849
16850 // The builtins take these arguments as vec4 where the last element is
16851 // ignored. The intrinsic takes them as vec3.
16852 RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
16853 ArrayRef<int>{0, 1, 2});
16854 RayDir =
16855 Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2});
16856 RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
16857 ArrayRef<int>{0, 1, 2});
16858
16859 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
16860 {NodePtr->getType(), RayDir->getType()});
16861 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
16862 RayInverseDir, TextureDescr});
16863 }
16864
16865 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
16866 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
16867 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
16868 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
16869 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
16870 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
16871 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
16872 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
16873 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
16874 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
16875 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
16876 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64: {
16877
16878 // These operations perform a matrix multiplication and accumulation of
16879 // the form:
16880 // D = A * B + C
16881 // The return type always matches the type of matrix C.
16882 unsigned ArgForMatchingRetType;
16883 unsigned BuiltinWMMAOp;
16884
16885 switch (BuiltinID) {
16886 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
16887 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
16888 ArgForMatchingRetType = 2;
16889 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
16890 break;
16891 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
16892 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
16893 ArgForMatchingRetType = 2;
16894 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
16895 break;
16896 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
16897 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
16898 ArgForMatchingRetType = 2;
16899 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
16900 break;
16901 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
16902 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
16903 ArgForMatchingRetType = 2;
16904 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
16905 break;
16906 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
16907 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
16908 ArgForMatchingRetType = 4;
16909 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
16910 break;
16911 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
16912 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
16913 ArgForMatchingRetType = 4;
16914 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
16915 break;
16916 }
16917
16919 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
16920 Args.push_back(EmitScalarExpr(E->getArg(i)));
16921
16922 Function *F = CGM.getIntrinsic(BuiltinWMMAOp,
16923 {Args[ArgForMatchingRetType]->getType()});
16924
16925 return Builder.CreateCall(F, Args);
16926 }
16927
16928 // amdgcn workitem
16929 case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
16930 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
16931 case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
16932 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
16933 case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
16934 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
16935
16936 // amdgcn workgroup size
16937 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
16938 return EmitAMDGPUWorkGroupSize(*this, 0);
16939 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
16940 return EmitAMDGPUWorkGroupSize(*this, 1);
16941 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
16942 return EmitAMDGPUWorkGroupSize(*this, 2);
16943
16944 // amdgcn grid size
16945 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
16946 return EmitAMDGPUGridSize(*this, 0);
16947 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
16948 return EmitAMDGPUGridSize(*this, 1);
16949 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
16950 return EmitAMDGPUGridSize(*this, 2);
16951
16952 // r600 intrinsics
16953 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
16954 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
16955 return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
16956 case AMDGPU::BI__builtin_r600_read_tidig_x:
16957 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
16958 case AMDGPU::BI__builtin_r600_read_tidig_y:
16959 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
16960 case AMDGPU::BI__builtin_r600_read_tidig_z:
16961 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
16962 case AMDGPU::BI__builtin_amdgcn_alignbit: {
16963 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
16964 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
16965 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
16966 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
16967 return Builder.CreateCall(F, { Src0, Src1, Src2 });
16968 }
16969
16970 case AMDGPU::BI__builtin_amdgcn_fence: {
16972 EmitScalarExpr(E->getArg(1)), AO, SSID))
16973 return Builder.CreateFence(AO, SSID);
16974 LLVM_FALLTHROUGH;
16975 }
16976 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
16977 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
16978 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
16979 case AMDGPU::BI__builtin_amdgcn_atomic_dec64: {
16980 unsigned BuiltinAtomicOp;
16981 llvm::Type *ResultType = ConvertType(E->getType());
16982
16983 switch (BuiltinID) {
16984 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
16985 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
16986 BuiltinAtomicOp = Intrinsic::amdgcn_atomic_inc;
16987 break;
16988 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
16989 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
16990 BuiltinAtomicOp = Intrinsic::amdgcn_atomic_dec;
16991 break;
16992 }
16993
16994 Value *Ptr = EmitScalarExpr(E->getArg(0));
16995 Value *Val = EmitScalarExpr(E->getArg(1));
16996
16997 llvm::Function *F =
16998 CGM.getIntrinsic(BuiltinAtomicOp, {ResultType, Ptr->getType()});
16999
17001 EmitScalarExpr(E->getArg(3)), AO, SSID)) {
17002
17003 // llvm.amdgcn.atomic.inc and llvm.amdgcn.atomic.dec expects ordering and
17004 // scope as unsigned values
17005 Value *MemOrder = Builder.getInt32(static_cast<int>(AO));
17006 Value *MemScope = Builder.getInt32(static_cast<int>(SSID));
17007
17008 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
17009 bool Volatile =
17010 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
17011 Value *IsVolatile = Builder.getInt1(static_cast<bool>(Volatile));
17012
17013 return Builder.CreateCall(F, {Ptr, Val, MemOrder, MemScope, IsVolatile});
17014 }
17015 LLVM_FALLTHROUGH;
17016 }
17017 default:
17018 return nullptr;
17019 }
17020}
17021
17022/// Handle a SystemZ function in which the final argument is a pointer
17023/// to an int that receives the post-instruction CC value. At the LLVM level
17024/// this is represented as a function that returns a {result, cc} pair.
17026 unsigned IntrinsicID,
17027 const CallExpr *E) {
17028 unsigned NumArgs = E->getNumArgs() - 1;
17029 SmallVector<Value *, 8> Args(NumArgs);
17030 for (unsigned I = 0; I < NumArgs; ++I)
17031 Args[I] = CGF.EmitScalarExpr(E->getArg(I));
17032 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
17033 Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
17034 Value *Call = CGF.Builder.CreateCall(F, Args);
17035 Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
17036 CGF.Builder.CreateStore(CC, CCPtr);
17037 return CGF.Builder.CreateExtractValue(Call, 0);
17038}
17039
17041 const CallExpr *E) {
17042 switch (BuiltinID) {
17043 case SystemZ::BI__builtin_tbegin: {
17044 Value *TDB = EmitScalarExpr(E->getArg(0));
17045 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
17046 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
17047 return Builder.CreateCall(F, {TDB, Control});
17048 }
17049 case SystemZ::BI__builtin_tbegin_nofloat: {
17050 Value *TDB = EmitScalarExpr(E->getArg(0));
17051 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
17052 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
17053 return Builder.CreateCall(F, {TDB, Control});
17054 }
17055 case SystemZ::BI__builtin_tbeginc: {
17056 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
17057 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
17058 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
17059 return Builder.CreateCall(F, {TDB, Control});
17060 }
17061 case SystemZ::BI__builtin_tabort: {
17062 Value *Data = EmitScalarExpr(E->getArg(0));
17063 Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
17064 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
17065 }
17066 case SystemZ::BI__builtin_non_tx_store: {
17068 Value *Data = EmitScalarExpr(E->getArg(1));
17069 Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
17070 return Builder.CreateCall(F, {Data, Address});
17071 }
17072
17073 // Vector builtins. Note that most vector builtins are mapped automatically
17074 // to target-specific LLVM intrinsics. The ones handled specially here can
17075 // be represented via standard LLVM IR, which is preferable to enable common
17076 // LLVM optimizations.
17077
17078 case SystemZ::BI__builtin_s390_vpopctb:
17079 case SystemZ::BI__builtin_s390_vpopcth:
17080 case SystemZ::BI__builtin_s390_vpopctf:
17081 case SystemZ::BI__builtin_s390_vpopctg: {
17082 llvm::Type *ResultType = ConvertType(E->getType());
17083 Value *X = EmitScalarExpr(E->getArg(0));
17084 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
17085 return Builder.CreateCall(F, X);
17086 }
17087
17088 case SystemZ::BI__builtin_s390_vclzb:
17089 case SystemZ::BI__builtin_s390_vclzh:
17090 case SystemZ::BI__builtin_s390_vclzf:
17091 case SystemZ::BI__builtin_s390_vclzg: {
17092 llvm::Type *ResultType = ConvertType(E->getType());
17093 Value *X = EmitScalarExpr(E->getArg(0));
17094 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
17095 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
17096 return Builder.CreateCall(F, {X, Undef});
17097 }
17098
17099 case SystemZ::BI__builtin_s390_vctzb:
17100 case SystemZ::BI__builtin_s390_vctzh:
17101 case SystemZ::BI__builtin_s390_vctzf:
17102 case SystemZ::BI__builtin_s390_vctzg: {
17103 llvm::Type *ResultType = ConvertType(E->getType());
17104 Value *X = EmitScalarExpr(E->getArg(0));
17105 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
17106 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
17107 return Builder.CreateCall(F, {X, Undef});
17108 }
17109
17110 case SystemZ::BI__builtin_s390_vfsqsb:
17111 case SystemZ::BI__builtin_s390_vfsqdb: {
17112 llvm::Type *ResultType = ConvertType(E->getType());
17113 Value *X = EmitScalarExpr(E->getArg(0));
17114 if (Builder.getIsFPConstrained()) {
17115 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);
17116 return Builder.CreateConstrainedFPCall(F, { X });
17117 } else {
17118 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
17119 return Builder.CreateCall(F, X);
17120 }
17121 }
17122 case SystemZ::BI__builtin_s390_vfmasb:
17123 case SystemZ::BI__builtin_s390_vfmadb: {
17124 llvm::Type *ResultType = ConvertType(E->getType());
17125 Value *X = EmitScalarExpr(E->getArg(0));
17126 Value *Y = EmitScalarExpr(E->getArg(1));
17127 Value *Z = EmitScalarExpr(E->getArg(2));
17128 if (Builder.getIsFPConstrained()) {
17129 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
17130 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
17131 } else {
17132 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
17133 return Builder.CreateCall(F, {X, Y, Z});
17134 }
17135 }
17136 case SystemZ::BI__builtin_s390_vfmssb:
17137 case SystemZ::BI__builtin_s390_vfmsdb: {
17138 llvm::Type *ResultType = ConvertType(E->getType());
17139 Value *X = EmitScalarExpr(E->getArg(0));
17140 Value *Y = EmitScalarExpr(E->getArg(1));
17141 Value *Z = EmitScalarExpr(E->getArg(2));
17142 if (Builder.getIsFPConstrained()) {
17143 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
17144 return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
17145 } else {
17146 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
17147 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
17148 }
17149 }
17150 case SystemZ::BI__builtin_s390_vfnmasb:
17151 case SystemZ::BI__builtin_s390_vfnmadb: {
17152 llvm::Type *ResultType = ConvertType(E->getType());
17153 Value *X = EmitScalarExpr(E->getArg(0));
17154 Value *Y = EmitScalarExpr(E->getArg(1));
17155 Value *Z = EmitScalarExpr(E->getArg(2));
17156 if (Builder.getIsFPConstrained()) {
17157 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
17158 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
17159 } else {
17160 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
17161 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
17162 }
17163 }
17164 case SystemZ::BI__builtin_s390_vfnmssb:
17165 case SystemZ::BI__builtin_s390_vfnmsdb: {
17166 llvm::Type *ResultType = ConvertType(E->getType());
17167 Value *X = EmitScalarExpr(E->getArg(0));
17168 Value *Y = EmitScalarExpr(E->getArg(1));
17169 Value *Z = EmitScalarExpr(E->getArg(2));
17170 if (Builder.getIsFPConstrained()) {
17171 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
17172 Value *NegZ = Builder.CreateFNeg(Z, "sub");
17173 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));
17174 } else {
17175 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
17176 Value *NegZ = Builder.CreateFNeg(Z, "neg");
17177 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
17178 }
17179 }
17180 case SystemZ::BI__builtin_s390_vflpsb:
17181 case SystemZ::BI__builtin_s390_vflpdb: {
17182 llvm::Type *ResultType = ConvertType(E->getType());
17183 Value *X = EmitScalarExpr(E->getArg(0));
17184 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
17185 return Builder.CreateCall(F, X);
17186 }
17187 case SystemZ::BI__builtin_s390_vflnsb:
17188 case SystemZ::BI__builtin_s390_vflndb: {
17189 llvm::Type *ResultType = ConvertType(E->getType());
17190 Value *X = EmitScalarExpr(E->getArg(0));
17191 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
17192 return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg");
17193 }
17194 case SystemZ::BI__builtin_s390_vfisb:
17195 case SystemZ::BI__builtin_s390_vfidb: {
17196 llvm::Type *ResultType = ConvertType(E->getType());
17197 Value *X = EmitScalarExpr(E->getArg(0));
17198 // Constant-fold the M4 and M5 mask arguments.
17199 llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());
17200 llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());
17201 // Check whether this instance can be represented via a LLVM standard
17202 // intrinsic. We only support some combinations of M4 and M5.
17203 Intrinsic::ID ID = Intrinsic::not_intrinsic;
17204 Intrinsic::ID CI;
17205 switch (M4.getZExtValue()) {
17206 default: break;
17207 case 0: // IEEE-inexact exception allowed
17208 switch (M5.getZExtValue()) {
17209 default: break;
17210 case 0: ID = Intrinsic::rint;
17211 CI = Intrinsic::experimental_constrained_rint; break;
17212 }
17213 break;
17214 case 4: // IEEE-inexact exception suppressed
17215 switch (M5.getZExtValue()) {
17216 default: break;
17217 case 0: ID = Intrinsic::nearbyint;
17218 CI = Intrinsic::experimental_constrained_nearbyint; break;
17219 case 1: ID = Intrinsic::round;
17220 CI = Intrinsic::experimental_constrained_round; break;
17221 case 5: ID = Intrinsic::trunc;
17222 CI = Intrinsic::experimental_constrained_trunc; break;
17223 case 6: ID = Intrinsic::ceil;
17224 CI = Intrinsic::experimental_constrained_ceil; break;
17225 case 7: ID = Intrinsic::floor;
17226 CI = Intrinsic::experimental_constrained_floor; break;
17227 }
17228 break;
17229 }
17230 if (ID != Intrinsic::not_intrinsic) {
17231 if (Builder.getIsFPConstrained()) {
17232 Function *F = CGM.getIntrinsic(CI, ResultType);
17233 return Builder.CreateConstrainedFPCall(F, X);
17234 } else {
17235 Function *F = CGM.getIntrinsic(ID, ResultType);
17236 return Builder.CreateCall(F, X);
17237 }
17238 }
17239 switch (BuiltinID) { // FIXME: constrained version?
17240 case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
17241 case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
17242 default: llvm_unreachable("Unknown BuiltinID");
17243 }
17244 Function *F = CGM.getIntrinsic(ID);
17245 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
17246 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
17247 return Builder.CreateCall(F, {X, M4Value, M5Value});
17248 }
17249 case SystemZ::BI__builtin_s390_vfmaxsb:
17250 case SystemZ::BI__builtin_s390_vfmaxdb: {
17251 llvm::Type *ResultType = ConvertType(E->getType());
17252 Value *X = EmitScalarExpr(E->getArg(0));
17253 Value *Y = EmitScalarExpr(E->getArg(1));
17254 // Constant-fold the M4 mask argument.
17255 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
17256 // Check whether this instance can be represented via a LLVM standard
17257 // intrinsic. We only support some values of M4.
17258 Intrinsic::ID ID = Intrinsic::not_intrinsic;
17259 Intrinsic::ID CI;
17260 switch (M4.getZExtValue()) {
17261 default: break;
17262 case 4: ID = Intrinsic::maxnum;
17263 CI = Intrinsic::experimental_constrained_maxnum; break;
17264 }
17265 if (ID != Intrinsic::not_intrinsic) {
17266 if (Builder.getIsFPConstrained()) {
17267 Function *F = CGM.getIntrinsic(CI, ResultType);
17268 return Builder.CreateConstrainedFPCall(F, {X, Y});
17269 } else {
17270 Function *F = CGM.getIntrinsic(ID, ResultType);
17271 return Builder.CreateCall(F, {X, Y});
17272 }
17273 }
17274 switch (BuiltinID) {
17275 case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
17276 case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
17277 default: llvm_unreachable("Unknown BuiltinID");
17278 }
17279 Function *F = CGM.getIntrinsic(ID);
17280 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
17281 return Builder.CreateCall(F, {X, Y, M4Value});
17282 }
17283 case SystemZ::BI__builtin_s390_vfminsb:
17284 case SystemZ::BI__builtin_s390_vfmindb: {
17285 llvm::Type *ResultType = ConvertType(E->getType());
17286 Value *X = EmitScalarExpr(E->getArg(0));
17287 Value *Y = EmitScalarExpr(E->getArg(1));
17288 // Constant-fold the M4 mask argument.
17289 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
17290 // Check whether this instance can be represented via a LLVM standard
17291 // intrinsic. We only support some values of M4.
17292 Intrinsic::ID ID = Intrinsic::not_intrinsic;
17293 Intrinsic::ID CI;
17294 switch (M4.getZExtValue()) {
17295 default: break;
17296 case 4: ID = Intrinsic::minnum;
17297 CI = Intrinsic::experimental_constrained_minnum; break;
17298 }
17299 if (ID != Intrinsic::not_intrinsic) {
17300 if (Builder.getIsFPConstrained()) {
17301 Function *F = CGM.getIntrinsic(CI, ResultType);
17302 return Builder.CreateConstrainedFPCall(F, {X, Y});
17303 } else {
17304 Function *F = CGM.getIntrinsic(ID, ResultType);
17305 return Builder.CreateCall(F, {X, Y});
17306 }
17307 }
17308 switch (BuiltinID) {
17309 case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
17310 case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
17311 default: llvm_unreachable("Unknown BuiltinID");
17312 }
17313 Function *F = CGM.getIntrinsic(ID);
17314 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
17315 return Builder.CreateCall(F, {X, Y, M4Value});
17316 }
17317
17318 case SystemZ::BI__builtin_s390_vlbrh:
17319 case SystemZ::BI__builtin_s390_vlbrf:
17320 case SystemZ::BI__builtin_s390_vlbrg: {
17321 llvm::Type *ResultType = ConvertType(E->getType());
17322 Value *X = EmitScalarExpr(E->getArg(0));
17323 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);
17324 return Builder.CreateCall(F, X);
17325 }
17326
17327 // Vector intrinsics that output the post-instruction CC value.
17328
17329#define INTRINSIC_WITH_CC(NAME) \
17330 case SystemZ::BI__builtin_##NAME: \
17331 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
17332
17333 INTRINSIC_WITH_CC(s390_vpkshs);
17334 INTRINSIC_WITH_CC(s390_vpksfs);
17335 INTRINSIC_WITH_CC(s390_vpksgs);
17336
17337 INTRINSIC_WITH_CC(s390_vpklshs);
17338 INTRINSIC_WITH_CC(s390_vpklsfs);
17339 INTRINSIC_WITH_CC(s390_vpklsgs);
17340
17341 INTRINSIC_WITH_CC(s390_vceqbs);
17342 INTRINSIC_WITH_CC(s390_vceqhs);
17343 INTRINSIC_WITH_CC(s390_vceqfs);
17344 INTRINSIC_WITH_CC(s390_vceqgs);
17345
17346 INTRINSIC_WITH_CC(s390_vchbs);
17347 INTRINSIC_WITH_CC(s390_vchhs);
17348 INTRINSIC_WITH_CC(s390_vchfs);
17349 INTRINSIC_WITH_CC(s390_vchgs);
17350
17351 INTRINSIC_WITH_CC(s390_vchlbs);
17352 INTRINSIC_WITH_CC(s390_vchlhs);
17353 INTRINSIC_WITH_CC(s390_vchlfs);
17354 INTRINSIC_WITH_CC(s390_vchlgs);
17355
17356 INTRINSIC_WITH_CC(s390_vfaebs);
17357 INTRINSIC_WITH_CC(s390_vfaehs);
17358 INTRINSIC_WITH_CC(s390_vfaefs);
17359
17360 INTRINSIC_WITH_CC(s390_vfaezbs);
17361 INTRINSIC_WITH_CC(s390_vfaezhs);
17362 INTRINSIC_WITH_CC(s390_vfaezfs);
17363
17364 INTRINSIC_WITH_CC(s390_vfeebs);
17365 INTRINSIC_WITH_CC(s390_vfeehs);
17366 INTRINSIC_WITH_CC(s390_vfeefs);
17367
17368 INTRINSIC_WITH_CC(s390_vfeezbs);
17369 INTRINSIC_WITH_CC(s390_vfeezhs);
17370 INTRINSIC_WITH_CC(s390_vfeezfs);
17371
17372 INTRINSIC_WITH_CC(s390_vfenebs);
17373 INTRINSIC_WITH_CC(s390_vfenehs);
17374 INTRINSIC_WITH_CC(s390_vfenefs);
17375
17376 INTRINSIC_WITH_CC(s390_vfenezbs);
17377 INTRINSIC_WITH_CC(s390_vfenezhs);
17378 INTRINSIC_WITH_CC(s390_vfenezfs);
17379
17380 INTRINSIC_WITH_CC(s390_vistrbs);
17381 INTRINSIC_WITH_CC(s390_vistrhs);
17382 INTRINSIC_WITH_CC(s390_vistrfs);
17383
17384 INTRINSIC_WITH_CC(s390_vstrcbs);
17385 INTRINSIC_WITH_CC(s390_vstrchs);
17386 INTRINSIC_WITH_CC(s390_vstrcfs);
17387
17388 INTRINSIC_WITH_CC(s390_vstrczbs);
17389 INTRINSIC_WITH_CC(s390_vstrczhs);
17390 INTRINSIC_WITH_CC(s390_vstrczfs);
17391
17392 INTRINSIC_WITH_CC(s390_vfcesbs);
17393 INTRINSIC_WITH_CC(s390_vfcedbs);
17394 INTRINSIC_WITH_CC(s390_vfchsbs);
17395 INTRINSIC_WITH_CC(s390_vfchdbs);
17396 INTRINSIC_WITH_CC(s390_vfchesbs);
17397 INTRINSIC_WITH_CC(s390_vfchedbs);
17398
17399 INTRINSIC_WITH_CC(s390_vftcisb);
17400 INTRINSIC_WITH_CC(s390_vftcidb);
17401
17402 INTRINSIC_WITH_CC(s390_vstrsb);
17403 INTRINSIC_WITH_CC(s390_vstrsh);
17404 INTRINSIC_WITH_CC(s390_vstrsf);
17405
17406 INTRINSIC_WITH_CC(s390_vstrszb);
17407 INTRINSIC_WITH_CC(s390_vstrszh);
17408 INTRINSIC_WITH_CC(s390_vstrszf);
17409
17410#undef INTRINSIC_WITH_CC
17411
17412 default:
17413 return nullptr;
17414 }
17415}
17416
17417namespace {
17418// Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
17419struct NVPTXMmaLdstInfo {
17420 unsigned NumResults; // Number of elements to load/store
17421 // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
17422 unsigned IID_col;
17423 unsigned IID_row;
17424};
17425
17426#define MMA_INTR(geom_op_type, layout) \
17427 Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
17428#define MMA_LDST(n, geom_op_type) \
17429 { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
17430
17431static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
17432 switch (BuiltinID) {
17433 // FP MMA loads
17434 case NVPTX::BI__hmma_m16n16k16_ld_a:
17435 return MMA_LDST(8, m16n16k16_load_a_f16);
17436 case NVPTX::BI__hmma_m16n16k16_ld_b:
17437 return MMA_LDST(8, m16n16k16_load_b_f16);
17438 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
17439 return MMA_LDST(4, m16n16k16_load_c_f16);
17440 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
17441 return MMA_LDST(8, m16n16k16_load_c_f32);
17442 case NVPTX::BI__hmma_m32n8k16_ld_a:
17443 return MMA_LDST(8, m32n8k16_load_a_f16);
17444 case NVPTX::BI__hmma_m32n8k16_ld_b:
17445 return MMA_LDST(8, m32n8k16_load_b_f16);
17446 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
17447 return MMA_LDST(4, m32n8k16_load_c_f16);
17448 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
17449 return MMA_LDST(8, m32n8k16_load_c_f32);
17450 case NVPTX::BI__hmma_m8n32k16_ld_a:
17451 return MMA_LDST(8, m8n32k16_load_a_f16);
17452 case NVPTX::BI__hmma_m8n32k16_ld_b:
17453 return MMA_LDST(8, m8n32k16_load_b_f16);
17454 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
17455 return MMA_LDST(4, m8n32k16_load_c_f16);
17456 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
17457 return MMA_LDST(8, m8n32k16_load_c_f32);
17458
17459 // Integer MMA loads
17460 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
17461 return MMA_LDST(2, m16n16k16_load_a_s8);
17462 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
17463 return MMA_LDST(2, m16n16k16_load_a_u8);
17464 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
17465 return MMA_LDST(2, m16n16k16_load_b_s8);
17466 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
17467 return MMA_LDST(2, m16n16k16_load_b_u8);
17468 case NVPTX::BI__imma_m16n16k16_ld_c:
17469 return MMA_LDST(8, m16n16k16_load_c_s32);
17470 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
17471 return MMA_LDST(4, m32n8k16_load_a_s8);
17472 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
17473 return MMA_LDST(4, m32n8k16_load_a_u8);
17474 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
17475 return MMA_LDST(1, m32n8k16_load_b_s8);
17476 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
17477 return MMA_LDST(1, m32n8k16_load_b_u8);
17478 case NVPTX::BI__imma_m32n8k16_ld_c:
17479 return MMA_LDST(8, m32n8k16_load_c_s32);
17480 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
17481 return MMA_LDST(1, m8n32k16_load_a_s8);
17482 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
17483 return MMA_LDST(1, m8n32k16_load_a_u8);
17484 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
17485 return MMA_LDST(4, m8n32k16_load_b_s8);
17486 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
17487 return MMA_LDST(4, m8n32k16_load_b_u8);
17488 case NVPTX::BI__imma_m8n32k16_ld_c:
17489 return MMA_LDST(8, m8n32k16_load_c_s32);
17490
17491 // Sub-integer MMA loads.
17492 // Only row/col layout is supported by A/B fragments.
17493 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
17494 return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};
17495 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
17496 return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};
17497 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
17498 return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};
17499 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
17500 return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};
17501 case NVPTX::BI__imma_m8n8k32_ld_c:
17502 return MMA_LDST(2, m8n8k32_load_c_s32);
17503 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
17504 return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};
17505 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
17506 return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};
17507 case NVPTX::BI__bmma_m8n8k128_ld_c:
17508 return MMA_LDST(2, m8n8k128_load_c_s32);
17509
17510 // Double MMA loads
17511 case NVPTX::BI__dmma_m8n8k4_ld_a:
17512 return MMA_LDST(1, m8n8k4_load_a_f64);
17513 case NVPTX::BI__dmma_m8n8k4_ld_b:
17514 return MMA_LDST(1, m8n8k4_load_b_f64);
17515 case NVPTX::BI__dmma_m8n8k4_ld_c:
17516 return MMA_LDST(2, m8n8k4_load_c_f64);
17517
17518 // Alternate float MMA loads
17519 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
17520 return MMA_LDST(4, m16n16k16_load_a_bf16);
17521 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
17522 return MMA_LDST(4, m16n16k16_load_b_bf16);
17523 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
17524 return MMA_LDST(2, m8n32k16_load_a_bf16);
17525 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
17526 return MMA_LDST(8, m8n32k16_load_b_bf16);
17527 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
17528 return MMA_LDST(8, m32n8k16_load_a_bf16);
17529 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
17530 return MMA_LDST(2, m32n8k16_load_b_bf16);
17531 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
17532 return MMA_LDST(4, m16n16k8_load_a_tf32);
17533 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
17534 return MMA_LDST(4, m16n16k8_load_b_tf32);
17535 case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
17536 return MMA_LDST(8, m16n16k8_load_c_f32);
17537
17538 // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike
17539 // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
17540 // use fragment C for both loads and stores.
17541 // FP MMA stores.
17542 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
17543 return MMA_LDST(4, m16n16k16_store_d_f16);
17544 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
17545 return MMA_LDST(8, m16n16k16_store_d_f32);
17546 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
17547 return MMA_LDST(4, m32n8k16_store_d_f16);
17548 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
17549 return MMA_LDST(8, m32n8k16_store_d_f32);
17550 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
17551 return MMA_LDST(4, m8n32k16_store_d_f16);
17552 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
17553 return MMA_LDST(8, m8n32k16_store_d_f32);
17554
17555 // Integer and sub-integer MMA stores.
17556 // Another naming quirk. Unlike other MMA builtins that use PTX types in the
17557 // name, integer loads/stores use LLVM's i32.
17558 case NVPTX::BI__imma_m16n16k16_st_c_i32:
17559 return MMA_LDST(8, m16n16k16_store_d_s32);
17560 case NVPTX::BI__imma_m32n8k16_st_c_i32:
17561 return MMA_LDST(8, m32n8k16_store_d_s32);
17562 case NVPTX::BI__imma_m8n32k16_st_c_i32:
17563 return MMA_LDST(8, m8n32k16_store_d_s32);
17564 case NVPTX::BI__imma_m8n8k32_st_c_i32:
17565 return MMA_LDST(2, m8n8k32_store_d_s32);
17566 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
17567 return MMA_LDST(2, m8n8k128_store_d_s32);
17568
17569 // Double MMA store
17570 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
17571 return MMA_LDST(2, m8n8k4_store_d_f64);
17572
17573 // Alternate float MMA store
17574 case NVPTX::BI__mma_m16n16k8_st_c_f32:
17575 return MMA_LDST(8, m16n16k8_store_d_f32);
17576
17577 default:
17578 llvm_unreachable("Unknown MMA builtin");
17579 }
17580}
17581#undef MMA_LDST
17582#undef MMA_INTR
17583
17584
17585struct NVPTXMmaInfo {
17586 unsigned NumEltsA;
17587 unsigned NumEltsB;
17588 unsigned NumEltsC;
17589 unsigned NumEltsD;
17590
17591 // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
17592 // over 'col' for layout. The index of non-satf variants is expected to match
17593 // the undocumented layout constants used by CUDA's mma.hpp.
17594 std::array<unsigned, 8> Variants;
17595
17596 unsigned getMMAIntrinsic(int Layout, bool Satf) {
17597 unsigned Index = Layout + 4 * Satf;
17598 if (Index >= Variants.size())
17599 return 0;
17600 return Variants[Index];
17601 }
17602};
17603
17604 // Returns an intrinsic that matches Layout and Satf for valid combinations of
17605 // Layout and Satf, 0 otherwise.
17606static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
17607 // clang-format off
17608#define MMA_VARIANTS(geom, type) \
17609 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
17610 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
17611 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
17612 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
17613#define MMA_SATF_VARIANTS(geom, type) \
17614 MMA_VARIANTS(geom, type), \
17615 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
17616 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
17617 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
17618 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
17619// Sub-integer MMA only supports row.col layout.
17620#define MMA_VARIANTS_I4(geom, type) \
17621 0, \
17622 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
17623 0, \
17624 0, \
17625 0, \
17626 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
17627 0, \
17628 0
17629// b1 MMA does not support .satfinite.
17630#define MMA_VARIANTS_B1_XOR(geom, type) \
17631 0, \
17632 Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \
17633 0, \
17634 0, \
17635 0, \
17636 0, \
17637 0, \
17638 0
17639#define MMA_VARIANTS_B1_AND(geom, type) \
17640 0, \
17641 Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \
17642 0, \
17643 0, \
17644 0, \
17645 0, \
17646 0, \
17647 0
17648 // clang-format on
17649 switch (BuiltinID) {
17650 // FP MMA
17651 // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
17652 // NumEltsN of return value are ordered as A,B,C,D.
17653 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
17654 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}};
17655 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
17656 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}};
17657 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
17658 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}};
17659 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
17660 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}};
17661 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
17662 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}};
17663 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
17664 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}};
17665 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
17666 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}};
17667 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
17668 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}};
17669 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
17670 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}};
17671 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
17672 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}};
17673 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
17674 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}};
17675 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
17676 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}};
17677
17678 // Integer MMA
17679 case NVPTX::BI__imma_m16n16k16_mma_s8:
17680 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}};
17681 case NVPTX::BI__imma_m16n16k16_mma_u8:
17682 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}};
17683 case NVPTX::BI__imma_m32n8k16_mma_s8:
17684 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}};
17685 case NVPTX::BI__imma_m32n8k16_mma_u8:
17686 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}};
17687 case NVPTX::BI__imma_m8n32k16_mma_s8:
17688 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}};
17689 case NVPTX::BI__imma_m8n32k16_mma_u8:
17690 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}};
17691
17692 // Sub-integer MMA
17693 case NVPTX::BI__imma_m8n8k32_mma_s4:
17694 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}};
17695 case NVPTX::BI__imma_m8n8k32_mma_u4:
17696 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}};
17697 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
17698 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}};
17699 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
17700 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}};
17701
17702 // Double MMA
17703 case NVPTX::BI__dmma_m8n8k4_mma_f64:
17704 return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}};
17705
17706 // Alternate FP MMA
17707 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
17708 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}};
17709 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
17710 return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}};
17711 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
17712 return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}};
17713 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:
17714 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}};
17715 default:
17716 llvm_unreachable("Unexpected builtin ID.");
17717 }
17718#undef MMA_VARIANTS
17719#undef MMA_SATF_VARIANTS
17720#undef MMA_VARIANTS_I4
17721#undef MMA_VARIANTS_B1_AND
17722#undef MMA_VARIANTS_B1_XOR
17723}
17724
17725} // namespace
17726
17727Value *
17728CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
17729 auto MakeLdg = [&](unsigned IntrinsicID) {
17730 Value *Ptr = EmitScalarExpr(E->getArg(0));
17731 QualType ArgType = E->getArg(0)->getType();
17733 llvm::Type *ElemTy = ConvertTypeForMem(ArgType->getPointeeType());
17734 return Builder.CreateCall(
17735 CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
17736 {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
17737 };
17738 auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
17739 Value *Ptr = EmitScalarExpr(E->getArg(0));
17740 llvm::Type *ElemTy =
17742 return Builder.CreateCall(
17743 CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
17744 {Ptr, EmitScalarExpr(E->getArg(1))});
17745 };
17746 switch (BuiltinID) {
17747 case NVPTX::BI__nvvm_atom_add_gen_i:
17748 case NVPTX::BI__nvvm_atom_add_gen_l:
17749 case NVPTX::BI__nvvm_atom_add_gen_ll:
17750 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
17751
17752 case NVPTX::BI__nvvm_atom_sub_gen_i:
17753 case NVPTX::BI__nvvm_atom_sub_gen_l:
17754 case NVPTX::BI__nvvm_atom_sub_gen_ll:
17755 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
17756
17757 case NVPTX::BI__nvvm_atom_and_gen_i:
17758 case NVPTX::BI__nvvm_atom_and_gen_l:
17759 case NVPTX::BI__nvvm_atom_and_gen_ll:
17760 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
17761
17762 case NVPTX::BI__nvvm_atom_or_gen_i:
17763 case NVPTX::BI__nvvm_atom_or_gen_l:
17764 case NVPTX::BI__nvvm_atom_or_gen_ll:
17765 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
17766
17767 case NVPTX::BI__nvvm_atom_xor_gen_i:
17768 case NVPTX::BI__nvvm_atom_xor_gen_l:
17769 case NVPTX::BI__nvvm_atom_xor_gen_ll:
17770 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
17771
17772 case NVPTX::BI__nvvm_atom_xchg_gen_i:
17773 case NVPTX::BI__nvvm_atom_xchg_gen_l:
17774 case NVPTX::BI__nvvm_atom_xchg_gen_ll:
17775 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
17776
17777 case NVPTX::BI__nvvm_atom_max_gen_i:
17778 case NVPTX::BI__nvvm_atom_max_gen_l:
17779 case NVPTX::BI__nvvm_atom_max_gen_ll:
17780 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
17781
17782 case NVPTX::BI__nvvm_atom_max_gen_ui:
17783 case NVPTX::BI__nvvm_atom_max_gen_ul:
17784 case NVPTX::BI__nvvm_atom_max_gen_ull:
17785 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
17786
17787 case NVPTX::BI__nvvm_atom_min_gen_i:
17788 case NVPTX::BI__nvvm_atom_min_gen_l:
17789 case NVPTX::BI__nvvm_atom_min_gen_ll:
17790 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
17791
17792 case NVPTX::BI__nvvm_atom_min_gen_ui:
17793 case NVPTX::BI__nvvm_atom_min_gen_ul:
17794 case NVPTX::BI__nvvm_atom_min_gen_ull:
17795 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
17796
17797 case NVPTX::BI__nvvm_atom_cas_gen_i:
17798 case NVPTX::BI__nvvm_atom_cas_gen_l:
17799 case NVPTX::BI__nvvm_atom_cas_gen_ll:
17800 // __nvvm_atom_cas_gen_* should return the old value rather than the
17801 // success flag.
17802 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
17803
17804 case NVPTX::BI__nvvm_atom_add_gen_f:
17805 case NVPTX::BI__nvvm_atom_add_gen_d: {
17806 Value *Ptr = EmitScalarExpr(E->getArg(0));
17807 Value *Val = EmitScalarExpr(E->getArg(1));
17808 return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, Ptr, Val,
17809 AtomicOrdering::SequentiallyConsistent);
17810 }
17811
17812 case NVPTX::BI__nvvm_atom_inc_gen_ui: {
17813 Value *Ptr = EmitScalarExpr(E->getArg(0));
17814 Value *Val = EmitScalarExpr(E->getArg(1));
17815 Function *FnALI32 =
17816 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
17817 return Builder.CreateCall(FnALI32, {Ptr, Val});
17818 }
17819
17820 case NVPTX::BI__nvvm_atom_dec_gen_ui: {
17821 Value *Ptr = EmitScalarExpr(E->getArg(0));
17822 Value *Val = EmitScalarExpr(E->getArg(1));
17823 Function *FnALD32 =
17824 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
17825 return Builder.CreateCall(FnALD32, {Ptr, Val});
17826 }
17827
17828 case NVPTX::BI__nvvm_ldg_c:
17829 case NVPTX::BI__nvvm_ldg_c2:
17830 case NVPTX::BI__nvvm_ldg_c4:
17831 case NVPTX::BI__nvvm_ldg_s:
17832 case NVPTX::BI__nvvm_ldg_s2:
17833 case NVPTX::BI__nvvm_ldg_s4:
17834 case NVPTX::BI__nvvm_ldg_i:
17835 case NVPTX::BI__nvvm_ldg_i2:
17836 case NVPTX::BI__nvvm_ldg_i4:
17837 case NVPTX::BI__nvvm_ldg_l:
17838 case NVPTX::BI__nvvm_ldg_ll:
17839 case NVPTX::BI__nvvm_ldg_ll2:
17840 case NVPTX::BI__nvvm_ldg_uc:
17841 case NVPTX::BI__nvvm_ldg_uc2:
17842 case NVPTX::BI__nvvm_ldg_uc4:
17843 case NVPTX::BI__nvvm_ldg_us:
17844 case NVPTX::BI__nvvm_ldg_us2:
17845 case NVPTX::BI__nvvm_ldg_us4:
17846 case NVPTX::BI__nvvm_ldg_ui:
17847 case NVPTX::BI__nvvm_ldg_ui2:
17848 case NVPTX::BI__nvvm_ldg_ui4:
17849 case NVPTX::BI__nvvm_ldg_ul:
17850 case NVPTX::BI__nvvm_ldg_ull:
17851 case NVPTX::BI__nvvm_ldg_ull2:
17852 // PTX Interoperability section 2.2: "For a vector with an even number of
17853 // elements, its alignment is set to number of elements times the alignment
17854 // of its member: n*alignof(t)."
17855 return MakeLdg(Intrinsic::nvvm_ldg_global_i);
17856 case NVPTX::BI__nvvm_ldg_f:
17857 case NVPTX::BI__nvvm_ldg_f2:
17858 case NVPTX::BI__nvvm_ldg_f4:
17859 case NVPTX::BI__nvvm_ldg_d:
17860 case NVPTX::BI__nvvm_ldg_d2:
17861 return MakeLdg(Intrinsic::nvvm_ldg_global_f);
17862
17863 case NVPTX::BI__nvvm_atom_cta_add_gen_i:
17864 case NVPTX::BI__nvvm_atom_cta_add_gen_l:
17865 case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
17866 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta);
17867 case NVPTX::BI__nvvm_atom_sys_add_gen_i:
17868 case NVPTX::BI__nvvm_atom_sys_add_gen_l:
17869 case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
17870 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys);
17871 case NVPTX::BI__nvvm_atom_cta_add_gen_f:
17872 case NVPTX::BI__nvvm_atom_cta_add_gen_d:
17873 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta);
17874 case NVPTX::BI__nvvm_atom_sys_add_gen_f:
17875 case NVPTX::BI__nvvm_atom_sys_add_gen_d:
17876 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys);
17877 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
17878 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
17879 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
17880 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta);
17881 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
17882 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
17883 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
17884 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys);
17885 case NVPTX::BI__nvvm_atom_cta_max_gen_i:
17886 case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
17887 case NVPTX::BI__nvvm_atom_cta_max_gen_l:
17888 case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
17889 case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
17890 case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
17891 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta);
17892 case NVPTX::BI__nvvm_atom_sys_max_gen_i:
17893 case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
17894 case NVPTX::BI__nvvm_atom_sys_max_gen_l:
17895 case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
17896 case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
17897 case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
17898 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys);
17899 case NVPTX::BI__nvvm_atom_cta_min_gen_i:
17900 case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
17901 case NVPTX::BI__nvvm_atom_cta_min_gen_l:
17902 case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
17903 case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
17904 case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
17905 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta);
17906 case NVPTX::BI__nvvm_atom_sys_min_gen_i:
17907 case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
17908 case NVPTX::BI__nvvm_atom_sys_min_gen_l:
17909 case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
17910 case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
17911 case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
17912 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys);
17913 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
17914 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta);
17915 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
17916 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta);
17917 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
17918 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys);
17919 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
17920 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys);
17921 case NVPTX::BI__nvvm_atom_cta_and_gen_i:
17922 case NVPTX::BI__nvvm_atom_cta_and_gen_l:
17923 case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
17924 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta);
17925 case NVPTX::BI__nvvm_atom_sys_and_gen_i:
17926 case NVPTX::BI__nvvm_atom_sys_and_gen_l:
17927 case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
17928 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys);
17929 case NVPTX::BI__nvvm_atom_cta_or_gen_i:
17930 case NVPTX::BI__nvvm_atom_cta_or_gen_l:
17931 case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
17932 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta);
17933 case NVPTX::BI__nvvm_atom_sys_or_gen_i:
17934 case NVPTX::BI__nvvm_atom_sys_or_gen_l:
17935 case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
17936 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys);
17937 case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
17938 case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
17939 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
17940 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta);
17941 case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
17942 case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
17943 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
17944 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys);
17945 case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
17946 case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
17947 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
17948 Value *Ptr = EmitScalarExpr(E->getArg(0));
17949 llvm::Type *ElemTy =
17951 return Builder.CreateCall(
17953 Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
17954 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
17955 }
17956 case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
17957 case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
17958 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
17959 Value *Ptr = EmitScalarExpr(E->getArg(0));
17960 llvm::Type *ElemTy =
17962 return Builder.CreateCall(
17964 Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),
17965 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
17966 }
17967 case NVPTX::BI__nvvm_match_all_sync_i32p:
17968 case NVPTX::BI__nvvm_match_all_sync_i64p: {
17969 Value *Mask = EmitScalarExpr(E->getArg(0));
17970 Value *Val = EmitScalarExpr(E->getArg(1));
17971 Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
17972 Value *ResultPair = Builder.CreateCall(
17973 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
17974 ? Intrinsic::nvvm_match_all_sync_i32p
17975 : Intrinsic::nvvm_match_all_sync_i64p),
17976 {Mask, Val});
17977 Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
17978 PredOutPtr.getElementType());
17979 Builder.CreateStore(Pred, PredOutPtr);
17980 return Builder.CreateExtractValue(ResultPair, 0);
17981 }
17982
17983 // FP MMA loads
17984 case NVPTX::BI__hmma_m16n16k16_ld_a:
17985 case NVPTX::BI__hmma_m16n16k16_ld_b:
17986 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
17987 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
17988 case NVPTX::BI__hmma_m32n8k16_ld_a:
17989 case NVPTX::BI__hmma_m32n8k16_ld_b:
17990 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
17991 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
17992 case NVPTX::BI__hmma_m8n32k16_ld_a:
17993 case NVPTX::BI__hmma_m8n32k16_ld_b:
17994 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
17995 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
17996 // Integer MMA loads.
17997 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
17998 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
17999 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
18000 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
18001 case NVPTX::BI__imma_m16n16k16_ld_c:
18002 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
18003 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
18004 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
18005 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
18006 case NVPTX::BI__imma_m32n8k16_ld_c:
18007 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
18008 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
18009 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
18010 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
18011 case NVPTX::BI__imma_m8n32k16_ld_c:
18012 // Sub-integer MMA loads.
18013 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
18014 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
18015 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
18016 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
18017 case NVPTX::BI__imma_m8n8k32_ld_c:
18018 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
18019 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
18020 case NVPTX::BI__bmma_m8n8k128_ld_c:
18021 // Double MMA loads.
18022 case NVPTX::BI__dmma_m8n8k4_ld_a:
18023 case NVPTX::BI__dmma_m8n8k4_ld_b:
18024 case NVPTX::BI__dmma_m8n8k4_ld_c:
18025 // Alternate float MMA loads.
18026 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
18027 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
18028 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
18029 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
18030 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
18031 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
18032 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
18033 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
18034 case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
18036 Value *Src = EmitScalarExpr(E->getArg(1));
18037 Value *Ldm = EmitScalarExpr(E->getArg(2));
18038 Optional<llvm::APSInt> isColMajorArg =
18040 if (!isColMajorArg)
18041 return nullptr;
18042 bool isColMajor = isColMajorArg->getSExtValue();
18043 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
18044 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
18045 if (IID == 0)
18046 return nullptr;
18047
18048 Value *Result =
18049 Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
18050
18051 // Save returned values.
18052 assert(II.NumResults);
18053 if (II.NumResults == 1) {
18054 Builder.CreateAlignedStore(Result, Dst.getPointer(),
18056 } else {
18057 for (unsigned i = 0; i < II.NumResults; ++i) {
18058 Builder.CreateAlignedStore(
18059 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
18060 Dst.getElementType()),
18061 Builder.CreateGEP(Dst.getElementType(), Dst.getPointer(),
18062 llvm::ConstantInt::get(IntTy, i)),
18064 }
18065 }
18066 return Result;
18067 }
18068
18069 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
18070 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
18071 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
18072 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
18073 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
18074 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
18075 case NVPTX::BI__imma_m16n16k16_st_c_i32:
18076 case NVPTX::BI__imma_m32n8k16_st_c_i32:
18077 case NVPTX::BI__imma_m8n32k16_st_c_i32:
18078 case NVPTX::BI__imma_m8n8k32_st_c_i32:
18079 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
18080 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
18081 case NVPTX::BI__mma_m16n16k8_st_c_f32: {
18082 Value *Dst = EmitScalarExpr(E->getArg(0));
18084 Value *Ldm = EmitScalarExpr(E->getArg(2));
18085 Optional<llvm::APSInt> isColMajorArg =
18087 if (!isColMajorArg)
18088 return nullptr;
18089 bool isColMajor = isColMajorArg->getSExtValue();
18090 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
18091 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
18092 if (IID == 0)
18093 return nullptr;
18094 Function *Intrinsic =
18095 CGM.getIntrinsic(IID, Dst->getType());
18096 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
18097 SmallVector<Value *, 10> Values = {Dst};
18098 for (unsigned i = 0; i < II.NumResults; ++i) {
18099 Value *V = Builder.CreateAlignedLoad(
18100 Src.getElementType(),
18101 Builder.CreateGEP(Src.getElementType(), Src.getPointer(),
18102 llvm::ConstantInt::get(IntTy, i)),
18104 Values.push_back(Builder.CreateBitCast(V, ParamType));
18105 }
18106 Values.push_back(Ldm);
18107 Value *Result = Builder.CreateCall(Intrinsic, Values);
18108 return Result;
18109 }
18110
18111 // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
18112 // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
18113 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
18114 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
18115 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
18116 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
18117 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
18118 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
18119 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
18120 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
18121 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
18122 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
18123 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
18124 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
18125 case NVPTX::BI__imma_m16n16k16_mma_s8:
18126 case NVPTX::BI__imma_m16n16k16_mma_u8:
18127 case NVPTX::BI__imma_m32n8k16_mma_s8:
18128 case NVPTX::BI__imma_m32n8k16_mma_u8:
18129 case NVPTX::BI__imma_m8n32k16_mma_s8:
18130 case NVPTX::BI__imma_m8n32k16_mma_u8:
18131 case NVPTX::BI__imma_m8n8k32_mma_s4:
18132 case NVPTX::BI__imma_m8n8k32_mma_u4:
18133 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
18134 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
18135 case NVPTX::BI__dmma_m8n8k4_mma_f64:
18136 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
18137 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
18138 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
18139 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
18144 Optional<llvm::APSInt> LayoutArg =
18146 if (!LayoutArg)
18147 return nullptr;
18148 int Layout = LayoutArg->getSExtValue();
18149 if (Layout < 0 || Layout > 3)
18150 return nullptr;
18151 llvm::APSInt SatfArg;
18152 if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
18153 BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
18154 SatfArg = 0; // .b1 does not have satf argument.
18155 else if (Optional<llvm::APSInt> OptSatfArg =
18157 SatfArg = *OptSatfArg;
18158 else
18159 return nullptr;
18160 bool Satf = SatfArg.getSExtValue();
18161 NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
18162 unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
18163 if (IID == 0) // Unsupported combination of Layout/Satf.
18164 return nullptr;
18165
18167 Function *Intrinsic = CGM.getIntrinsic(IID);
18168 llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
18169 // Load A
18170 for (unsigned i = 0; i < MI.NumEltsA; ++i) {
18171 Value *V = Builder.CreateAlignedLoad(
18172 SrcA.getElementType(),
18173 Builder.CreateGEP(SrcA.getElementType(), SrcA.getPointer(),
18174 llvm::ConstantInt::get(IntTy, i)),
18176 Values.push_back(Builder.CreateBitCast(V, AType));
18177 }
18178 // Load B
18179 llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
18180 for (unsigned i = 0; i < MI.NumEltsB; ++i) {
18181 Value *V = Builder.CreateAlignedLoad(
18182 SrcB.getElementType(),
18183 Builder.CreateGEP(SrcB.getElementType(), SrcB.getPointer(),
18184 llvm::ConstantInt::get(IntTy, i)),
18186 Values.push_back(Builder.CreateBitCast(V, BType));
18187 }
18188 // Load C
18189 llvm::Type *CType =
18190 Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
18191 for (unsigned i = 0; i < MI.NumEltsC; ++i) {
18192 Value *V = Builder.CreateAlignedLoad(
18193 SrcC.getElementType(),
18194 Builder.CreateGEP(SrcC.getElementType(), SrcC.getPointer(),
18195 llvm::ConstantInt::get(IntTy, i)),
18197 Values.push_back(Builder.CreateBitCast(V, CType));
18198 }
18199 Value *Result = Builder.CreateCall(Intrinsic, Values);
18200 llvm::Type *DType = Dst.getElementType();
18201 for (unsigned i = 0; i < MI.NumEltsD; ++i)
18202 Builder.CreateAlignedStore(
18203 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
18204 Builder.CreateGEP(Dst.getElementType(), Dst.getPointer(),
18205 llvm::ConstantInt::get(IntTy, i)),
18207 return Result;
18208 }
18209 default:
18210 return nullptr;
18211 }
18212}
18213
18214namespace {
18215struct BuiltinAlignArgs {
18216 llvm::Value *Src = nullptr;
18217 llvm::Type *SrcType = nullptr;
18218 llvm::Value *Alignment = nullptr;
18219 llvm::Value *Mask = nullptr;
18220 llvm::IntegerType *IntType = nullptr;
18221
18222 BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) {
18223 QualType AstType = E->getArg(0)->getType();
18224 if (AstType->isArrayType())
18225 Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).getPointer();
18226 else
18227 Src = CGF.EmitScalarExpr(E->getArg(0));
18228 SrcType = Src->getType();
18229 if (SrcType->isPointerTy()) {
18230 IntType = IntegerType::get(
18231 CGF.getLLVMContext(),
18232 CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));
18233 } else {
18234 assert(SrcType->isIntegerTy());
18235 IntType = cast<llvm::IntegerType>(SrcType);
18236 }
18237 Alignment = CGF.EmitScalarExpr(E->getArg(1));
18238 Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment");
18239 auto *One = llvm::ConstantInt::get(IntType, 1);
18240 Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
18241 }
18242};
18243} // namespace
18244
18245/// Generate (x & (y-1)) == 0.
18247 BuiltinAlignArgs Args(E, *this);
18248 llvm::Value *SrcAddress = Args.Src;
18249 if (Args.SrcType->isPointerTy())
18250 SrcAddress =
18251 Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr");
18252 return RValue::get(Builder.CreateICmpEQ(
18253 Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"),
18254 llvm::Constant::getNullValue(Args.IntType), "is_aligned"));
18255}
18256
18257/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
18258/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
18259/// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
18260/// TODO: actually use ptrmask once most optimization passes know about it.
18261RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) {
18262 BuiltinAlignArgs Args(E, *this);
18263 llvm::Value *SrcAddr = Args.Src;
18264 if (Args.Src->getType()->isPointerTy())
18265 SrcAddr = Builder.CreatePtrToInt(Args.Src, Args.IntType, "intptr");
18266 llvm::Value *SrcForMask = SrcAddr;
18267 if (AlignUp) {
18268 // When aligning up we have to first add the mask to ensure we go over the
18269 // next alignment value and then align down to the next valid multiple.
18270 // By adding the mask, we ensure that align_up on an already aligned
18271 // value will not change the value.
18272 SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
18273 }
18274 // Invert the mask to only clear the lower bits.
18275 llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");
18276 llvm::Value *Result =
18277 Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
18278 if (Args.Src->getType()->isPointerTy()) {
18279 /// TODO: Use ptrmask instead of ptrtoint+gep once it is optimized well.
18280 // Result = Builder.CreateIntrinsic(
18281 // Intrinsic::ptrmask, {Args.SrcType, SrcForMask->getType(), Args.IntType},
18282 // {SrcForMask, NegatedMask}, nullptr, "aligned_result");
18283 Result->setName("aligned_intptr");
18284 llvm::Value *Difference = Builder.CreateSub(Result, SrcAddr, "diff");
18285 // The result must point to the same underlying allocation. This means we
18286 // can use an inbounds GEP to enable better optimization.
18287 Value *Base = EmitCastToVoidPtr(Args.Src);
18288 if (getLangOpts().isSignedOverflowDefined())
18289 Result = Builder.CreateGEP(Int8Ty, Base, Difference, "aligned_result");
18290 else
18292 /*SignedIndices=*/true,
18293 /*isSubtraction=*/!AlignUp,
18294 E->getExprLoc(), "aligned_result");
18295 Result = Builder.CreatePointerCast(Result, Args.SrcType);
18296 // Emit an alignment assumption to ensure that the new alignment is
18297 // propagated to loads/stores, etc.
18298 emitAlignmentAssumption(Result, E, E->getExprLoc(), Args.Alignment);
18299 }
18300 assert(Result->getType() == Args.SrcType);
18301 return RValue::get(Result);
18302}
18303
18305 const CallExpr *E) {
18306 switch (BuiltinID) {
18307 case WebAssembly::BI__builtin_wasm_memory_size: {
18308 llvm::Type *ResultType = ConvertType(E->getType());
18309 Value *I = EmitScalarExpr(E->getArg(0));
18310 Function *Callee =
18311 CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
18312 return Builder.CreateCall(Callee, I);
18313 }
18314 case WebAssembly::BI__builtin_wasm_memory_grow: {
18315 llvm::Type *ResultType = ConvertType(E->getType());
18316 Value *Args[] = {EmitScalarExpr(E->getArg(0)),
18317 EmitScalarExpr(E->getArg(1))};
18318 Function *Callee =
18319 CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
18320 return Builder.CreateCall(Callee, Args);
18321 }
18322 case WebAssembly::BI__builtin_wasm_tls_size: {
18323 llvm::Type *ResultType = ConvertType(E->getType());
18324 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
18325 return Builder.CreateCall(Callee);
18326 }
18327 case WebAssembly::BI__builtin_wasm_tls_align: {
18328 llvm::Type *ResultType = ConvertType(E->getType());
18329 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType);
18330 return Builder.CreateCall(Callee);
18331 }
18332 case WebAssembly::BI__builtin_wasm_tls_base: {
18333 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base);
18334 return Builder.CreateCall(Callee);
18335 }
18336 case WebAssembly::BI__builtin_wasm_throw: {
18337 Value *Tag = EmitScalarExpr(E->getArg(0));
18338 Value *Obj = EmitScalarExpr(E->getArg(1));
18339 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
18340 return Builder.CreateCall(Callee, {Tag, Obj});
18341 }
18342 case WebAssembly::BI__builtin_wasm_rethrow: {
18343 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
18344 return Builder.CreateCall(Callee);
18345 }
18346 case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {
18347 Value *Addr = EmitScalarExpr(E->getArg(0));
18349 Value *Timeout = EmitScalarExpr(E->getArg(2));
18350 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32);
18351 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
18352 }
18353 case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {
18354 Value *Addr = EmitScalarExpr(E->getArg(0));
18356 Value *Timeout = EmitScalarExpr(E->getArg(2));
18357 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64);
18358 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
18359 }
18360 case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {
18361 Value *Addr = EmitScalarExpr(E->getArg(0));
18362 Value *Count = EmitScalarExpr(E->getArg(1));
18363 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify);
18364 return Builder.CreateCall(Callee, {Addr, Count});
18365 }
18366 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
18367 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:
18368 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:
18369 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {
18370 Value *Src = EmitScalarExpr(E->getArg(0));
18371 llvm::Type *ResT = ConvertType(E->getType());
18372 Function *Callee =
18373 CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()});
18374 return Builder.CreateCall(Callee, {Src});
18375 }
18376 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:
18377 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:
18378 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:
18379 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {
18380 Value *Src = EmitScalarExpr(E->getArg(0));
18381 llvm::Type *ResT = ConvertType(E->getType());
18382 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned,
18383 {ResT, Src->getType()});
18384 return Builder.CreateCall(Callee, {Src});
18385 }
18386 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
18387 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
18388 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
18389 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
18390 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
18391 Value *Src = EmitScalarExpr(E->getArg(0));
18392 llvm::Type *ResT = ConvertType(E->getType());
18393 Function *Callee =
18394 CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});
18395 return Builder.CreateCall(Callee, {Src});
18396 }
18397 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
18398 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
18399 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
18400 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
18401 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
18402 Value *Src = EmitScalarExpr(E->getArg(0));
18403 llvm::Type *ResT = ConvertType(E->getType());
18404 Function *Callee =
18405 CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});
18406 return Builder.CreateCall(Callee, {Src});
18407 }
18408 case WebAssembly::BI__builtin_wasm_min_f32:
18409 case WebAssembly::BI__builtin_wasm_min_f64:
18410 case WebAssembly::BI__builtin_wasm_min_f32x4:
18411 case WebAssembly::BI__builtin_wasm_min_f64x2: {
18412 Value *LHS = EmitScalarExpr(E->getArg(0));
18413 Value *RHS = EmitScalarExpr(E->getArg(1));
18414 Function *Callee =
18415 CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType()));
18416 return Builder.CreateCall(Callee, {LHS, RHS});
18417 }
18418 case WebAssembly::BI__builtin_wasm_max_f32:
18419 case WebAssembly::BI__builtin_wasm_max_f64:
18420 case WebAssembly::BI__builtin_wasm_max_f32x4:
18421 case WebAssembly::BI__builtin_wasm_max_f64x2: {
18422 Value *LHS = EmitScalarExpr(E->getArg(0));
18423 Value *RHS = EmitScalarExpr(E->getArg(1));
18424 Function *Callee =
18425 CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
18426 return Builder.CreateCall(Callee, {LHS, RHS});
18427 }
18428 case WebAssembly::BI__builtin_wasm_pmin_f32x4:
18429 case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
18430 Value *LHS = EmitScalarExpr(E->getArg(0));
18431 Value *RHS = EmitScalarExpr(E->getArg(1));
18432 Function *Callee =
18433 CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
18434 return Builder.CreateCall(Callee, {LHS, RHS});
18435 }
18436 case WebAssembly::BI__builtin_wasm_pmax_f32x4:
18437 case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
18438 Value *LHS = EmitScalarExpr(E->getArg(0));
18439 Value *RHS = EmitScalarExpr(E->getArg(1));
18440 Function *Callee =
18441 CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
18442 return Builder.CreateCall(Callee, {LHS, RHS});
18443 }
18444 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
18445 case WebAssembly::BI__builtin_wasm_floor_f32x4:
18446 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
18447 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
18448 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
18449 case WebAssembly::BI__builtin_wasm_floor_f64x2:
18450 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
18451 case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
18452 unsigned IntNo;
18453 switch (BuiltinID) {
18454 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
18455 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
18456 IntNo = Intrinsic::ceil;
18457 break;
18458 case WebAssembly::BI__builtin_wasm_floor_f32x4:
18459 case WebAssembly::BI__builtin_wasm_floor_f64x2:
18460 IntNo = Intrinsic::floor;
18461 break;
18462 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
18463 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
18464 IntNo = Intrinsic::trunc;
18465 break;
18466 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
18467 case WebAssembly::BI__builtin_wasm_nearest_f64x2:
18468 IntNo = Intrinsic::nearbyint;
18469 break;
18470 default:
18471 llvm_unreachable("unexpected builtin ID");
18472 }
18473 Value *Value = EmitScalarExpr(E->getArg(0));
18474 Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
18475 return Builder.CreateCall(Callee, Value);
18476 }
18477 case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {
18478 Value *Src = EmitScalarExpr(E->getArg(0));
18479 Value *Indices = EmitScalarExpr(E->getArg(1));
18480 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);
18481 return Builder.CreateCall(Callee, {Src, Indices});
18482 }
18483 case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
18484 case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
18485 case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
18486 case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
18487 case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
18488 case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
18489 case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
18490 case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8: {
18491 unsigned IntNo;
18492 switch (BuiltinID) {
18493 case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
18494 case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
18495 IntNo = Intrinsic::sadd_sat;
18496 break;
18497 case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
18498 case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
18499 IntNo = Intrinsic::uadd_sat;
18500 break;
18501 case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
18502 case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
18503 IntNo = Intrinsic::wasm_sub_sat_signed;
18504 break;
18505 case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
18506 case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8:
18507 IntNo = Intrinsic::wasm_sub_sat_unsigned;
18508 break;
18509 default:
18510 llvm_unreachable("unexpected builtin ID");
18511 }
18512 Value *LHS = EmitScalarExpr(E->getArg(0));
18513 Value *RHS = EmitScalarExpr(E->getArg(1));
18514 Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
18515 return Builder.CreateCall(Callee, {LHS, RHS});
18516 }
18517 case WebAssembly::BI__builtin_wasm_abs_i8x16:
18518 case WebAssembly::BI__builtin_wasm_abs_i16x8:
18519 case WebAssembly::BI__builtin_wasm_abs_i32x4:
18520 case WebAssembly::BI__builtin_wasm_abs_i64x2: {
18521 Value *Vec = EmitScalarExpr(E->getArg(0));
18522 Value *Neg = Builder.CreateNeg(Vec, "neg");
18523 Constant *Zero = llvm::Constant::getNullValue(Vec->getType());
18524 Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond");
18525 return Builder.CreateSelect(ICmp, Neg, Vec, "abs");
18526 }
18527 case WebAssembly::BI__builtin_wasm_min_s_i8x16:
18528 case WebAssembly::BI__builtin_wasm_min_u_i8x16:
18529 case WebAssembly::BI__builtin_wasm_max_s_i8x16:
18530 case WebAssembly::BI__builtin_wasm_max_u_i8x16:
18531 case WebAssembly::BI__builtin_wasm_min_s_i16x8:
18532 case WebAssembly::BI__builtin_wasm_min_u_i16x8:
18533 case WebAssembly::BI__builtin_wasm_max_s_i16x8:
18534 case WebAssembly::BI__builtin_wasm_max_u_i16x8:
18535 case WebAssembly::BI__builtin_wasm_min_s_i32x4:
18536 case WebAssembly::BI__builtin_wasm_min_u_i32x4:
18537 case WebAssembly::BI__builtin_wasm_max_s_i32x4:
18538 case WebAssembly::BI__builtin_wasm_max_u_i32x4: {
18539 Value *LHS = EmitScalarExpr(E->getArg(0));
18540 Value *RHS = EmitScalarExpr(E->getArg(1));
18541 Value *ICmp;
18542 switch (BuiltinID) {
18543 case WebAssembly::BI__builtin_wasm_min_s_i8x16:
18544 case WebAssembly::BI__builtin_wasm_min_s_i16x8:
18545 case WebAssembly::BI__builtin_wasm_min_s_i32x4:
18546 ICmp = Builder.CreateICmpSLT(LHS, RHS);
18547 break;
18548 case WebAssembly::BI__builtin_wasm_min_u_i8x16:
18549 case WebAssembly::BI__builtin_wasm_min_u_i16x8:
18550 case WebAssembly::BI__builtin_wasm_min_u_i32x4:
18551 ICmp = Builder.CreateICmpULT(LHS, RHS);
18552 break;
18553 case WebAssembly::BI__builtin_wasm_max_s_i8x16:
18554 case WebAssembly::BI__builtin_wasm_max_s_i16x8:
18555 case WebAssembly::BI__builtin_wasm_max_s_i32x4:
18556 ICmp = Builder.CreateICmpSGT(LHS, RHS);
18557 break;
18558 case WebAssembly::BI__builtin_wasm_max_u_i8x16:
18559 case WebAssembly::BI__builtin_wasm_max_u_i16x8:
18560 case WebAssembly::BI__builtin_wasm_max_u_i32x4:
18561 ICmp = Builder.CreateICmpUGT(LHS, RHS);
18562 break;
18563 default:
18564 llvm_unreachable("unexpected builtin ID");
18565 }
18566 return Builder.CreateSelect(ICmp, LHS, RHS);
18567 }
18568 case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
18569 case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
18570 Value *LHS = EmitScalarExpr(E->getArg(0));
18571 Value *RHS = EmitScalarExpr(E->getArg(1));
18572 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned,
18573 ConvertType(E->getType()));
18574 return Builder.CreateCall(Callee, {LHS, RHS});
18575 }
18576 case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: {
18577 Value *LHS = EmitScalarExpr(E->getArg(0));
18578 Value *RHS = EmitScalarExpr(E->getArg(1));
18579 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed);
18580 return Builder.CreateCall(Callee, {LHS, RHS});
18581 }
18582 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
18583 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
18584 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
18585 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {
18586 Value *Vec = EmitScalarExpr(E->getArg(0));
18587 unsigned IntNo;
18588 switch (BuiltinID) {
18589 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
18590 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
18591 IntNo = Intrinsic::wasm_extadd_pairwise_signed;
18592 break;
18593 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
18594 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:
18595 IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
18596 break;
18597 default:
18598 llvm_unreachable("unexptected builtin ID");
18599 }
18600
18601 Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
18602 return Builder.CreateCall(Callee, Vec);
18603 }
18604 case WebAssembly::BI__builtin_wasm_bitselect: {
18605 Value *V1 = EmitScalarExpr(E->getArg(0));
18606 Value *V2 = EmitScalarExpr(E->getArg(1));
18607 Value *C = EmitScalarExpr(E->getArg(2));
18608 Function *Callee =
18609 CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));
18610 return Builder.CreateCall(Callee, {V1, V2, C});
18611 }
18612 case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
18613 Value *LHS = EmitScalarExpr(E->getArg(0));
18614 Value *RHS = EmitScalarExpr(E->getArg(1));
18615 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
18616 return Builder.CreateCall(Callee, {LHS, RHS});
18617 }
18618 case WebAssembly::BI__builtin_wasm_popcnt_i8x16: {
18619 Value *Vec = EmitScalarExpr(E->getArg(0));
18620 Function *Callee =
18621 CGM.getIntrinsic(Intrinsic::ctpop, ConvertType(E->getType()));
18622 return Builder.CreateCall(Callee, {Vec});
18623 }
18624 case WebAssembly::BI__builtin_wasm_any_true_v128:
18625 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
18626 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
18627 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
18628 case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
18629 unsigned IntNo;
18630 switch (BuiltinID) {
18631 case WebAssembly::BI__builtin_wasm_any_true_v128:
18632 IntNo = Intrinsic::wasm_anytrue;
18633 break;
18634 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
18635 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
18636 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
18637 case WebAssembly::BI__builtin_wasm_all_true_i64x2:
18638 IntNo = Intrinsic::wasm_alltrue;
18639 break;
18640 default:
18641 llvm_unreachable("unexpected builtin ID");
18642 }
18643 Value *Vec = EmitScalarExpr(E->getArg(0));
18644 Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
18645 return Builder.CreateCall(Callee, {Vec});
18646 }
18647 case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
18648 case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
18649 case WebAssembly::BI__builtin_wasm_bitmask_i32x4:
18650 case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {
18651 Value *Vec = EmitScalarExpr(E->getArg(0));
18652 Function *Callee =
18653 CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
18654 return Builder.CreateCall(Callee, {Vec});
18655 }
18656 case WebAssembly::BI__builtin_wasm_abs_f32x4:
18657 case WebAssembly::BI__builtin_wasm_abs_f64x2: {
18658 Value *Vec = EmitScalarExpr(E->getArg(0));
18659 Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
18660 return Builder.CreateCall(Callee, {Vec});
18661 }
18662 case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
18663 case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
18664 Value *Vec = EmitScalarExpr(E->getArg(0));
18665 Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
18666 return Builder.CreateCall(Callee, {Vec});
18667 }
18668 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
18669 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
18670 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
18671 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {
18672 Value *Low = EmitScalarExpr(E->getArg(0));
18673 Value *High = EmitScalarExpr(E->getArg(1));
18674 unsigned IntNo;
18675 switch (BuiltinID) {
18676 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
18677 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
18678 IntNo = Intrinsic::wasm_narrow_signed;
18679 break;
18680 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
18681 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
18682 IntNo = Intrinsic::wasm_narrow_unsigned;
18683 break;
18684 default:
18685 llvm_unreachable("unexpected builtin ID");
18686 }
18687 Function *Callee =
18688 CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
18689 return Builder.CreateCall(Callee, {Low, High});
18690 }
18691 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
18692 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: {
18693 Value *Vec = EmitScalarExpr(E->getArg(0));
18694 unsigned IntNo;
18695 switch (BuiltinID) {
18696 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
18697 IntNo = Intrinsic::fptosi_sat;
18698 break;
18699 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4:
18700 IntNo = Intrinsic::fptoui_sat;
18701 break;
18702 default:
18703 llvm_unreachable("unexpected builtin ID");
18704 }
18705 llvm::Type *SrcT = Vec->getType();
18706 llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty());
18707 Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT});
18708 Value *Trunc = Builder.CreateCall(Callee, Vec);
18709 Value *Splat = Constant::getNullValue(TruncT);
18710 return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3});
18711 }
18712 case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
18713 Value *Ops[18];
18714 size_t OpIdx = 0;
18715 Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
18716 Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
18717 while (OpIdx < 18) {
18718 Optional<llvm::APSInt> LaneConst =
18720 assert(LaneConst && "Constant arg isn't actually constant?");
18721 Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);
18722 }
18723 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
18724 return Builder.CreateCall(Callee, Ops);
18725 }
18726 case WebAssembly::BI__builtin_wasm_fma_f32x4:
18727 case WebAssembly::BI__builtin_wasm_fms_f32x4:
18728 case WebAssembly::BI__builtin_wasm_fma_f64x2:
18729 case WebAssembly::BI__builtin_wasm_fms_f64x2: {
18730 Value *A = EmitScalarExpr(E->getArg(0));
18731 Value *B = EmitScalarExpr(E->getArg(1));
18732 Value *C = EmitScalarExpr(E->getArg(2));
18733 unsigned IntNo;
18734 switch (BuiltinID) {
18735 case WebAssembly::BI__builtin_wasm_fma_f32x4:
18736 case WebAssembly::BI__builtin_wasm_fma_f64x2:
18737 IntNo = Intrinsic::wasm_fma;
18738 break;
18739 case WebAssembly::BI__builtin_wasm_fms_f32x4:
18740 case WebAssembly::BI__builtin_wasm_fms_f64x2:
18741 IntNo = Intrinsic::wasm_fms;
18742 break;
18743 default:
18744 llvm_unreachable("unexpected builtin ID");
18745 }
18746 Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
18747 return Builder.CreateCall(Callee, {A, B, C});
18748 }
18749 case WebAssembly::BI__builtin_wasm_laneselect_i8x16:
18750 case WebAssembly::BI__builtin_wasm_laneselect_i16x8:
18751 case WebAssembly::BI__builtin_wasm_laneselect_i32x4:
18752 case WebAssembly::BI__builtin_wasm_laneselect_i64x2: {
18753 Value *A = EmitScalarExpr(E->getArg(0));
18754 Value *B = EmitScalarExpr(E->getArg(1));
18755 Value *C = EmitScalarExpr(E->getArg(2));
18756 Function *Callee =
18757 CGM.getIntrinsic(Intrinsic::wasm_laneselect, A->getType());
18758 return Builder.CreateCall(Callee, {A, B, C});
18759 }
18760 case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
18761 Value *Src = EmitScalarExpr(E->getArg(0));
18762 Value *Indices = EmitScalarExpr(E->getArg(1));
18763 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle);
18764 return Builder.CreateCall(Callee, {Src, Indices});
18765 }
18766 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
18767 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
18768 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
18769 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {
18770 Value *LHS = EmitScalarExpr(E->getArg(0));
18771 Value *RHS = EmitScalarExpr(E->getArg(1));
18772 unsigned IntNo;
18773 switch (BuiltinID) {
18774 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
18775 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
18776 IntNo = Intrinsic::wasm_relaxed_min;
18777 break;
18778 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
18779 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:
18780 IntNo = Intrinsic::wasm_relaxed_max;
18781 break;
18782 default:
18783 llvm_unreachable("unexpected builtin ID");
18784 }
18785 Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType());
18786 return Builder.CreateCall(Callee, {LHS, RHS});
18787 }
18788 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
18789 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
18790 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
18791 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: {
18792 Value *Vec = EmitScalarExpr(E->getArg(0));
18793 unsigned IntNo;
18794 switch (BuiltinID) {
18795 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
18796 IntNo = Intrinsic::wasm_relaxed_trunc_signed;
18797 break;
18798 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
18799 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;
18800 break;
18801 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
18802 IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero;
18803 break;
18804 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2:
18805 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero;
18806 break;
18807 default:
18808 llvm_unreachable("unexpected builtin ID");
18809 }
18810 Function *Callee = CGM.getIntrinsic(IntNo);
18811 return Builder.CreateCall(Callee, {Vec});
18812 }
18813 case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: {
18814 Value *LHS = EmitScalarExpr(E->getArg(0));
18815 Value *RHS = EmitScalarExpr(E->getArg(1));
18816 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed);
18817 return Builder.CreateCall(Callee, {LHS, RHS});
18818 }
18819 case WebAssembly::BI__builtin_wasm_dot_i8x16_i7x16_s_i16x8: {
18820 Value *LHS = EmitScalarExpr(E->getArg(0));
18821 Value *RHS = EmitScalarExpr(E->getArg(1));
18822 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot_i8x16_i7x16_signed);
18823 return Builder.CreateCall(Callee, {LHS, RHS});
18824 }
18825 case WebAssembly::BI__builtin_wasm_dot_i8x16_i7x16_add_s_i32x4: {
18826 Value *LHS = EmitScalarExpr(E->getArg(0));
18827 Value *RHS = EmitScalarExpr(E->getArg(1));
18828 Value *Acc = EmitScalarExpr(E->getArg(2));
18829 Function *Callee =
18830 CGM.getIntrinsic(Intrinsic::wasm_dot_i8x16_i7x16_add_signed);
18831 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
18832 }
18833 default:
18834 return nullptr;
18835 }
18836}
18837
18838static std::pair<Intrinsic::ID, unsigned>
18840 struct Info {
18841 unsigned BuiltinID;
18842 Intrinsic::ID IntrinsicID;
18843 unsigned VecLen;
18844 };
18845 Info Infos[] = {
18846#define CUSTOM_BUILTIN_MAPPING(x,s) \
18847 { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
18848 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)
18849 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0)
18850 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0)
18851 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0)
18852 CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0)
18853 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0)
18854 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0)
18855 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0)
18856 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0)
18857 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0)
18858 CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0)
18859 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0)
18860 CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0)
18861 CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0)
18862 CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0)
18863 CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0)
18864 CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0)
18865 CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0)
18866 CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0)
18867 CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0)
18868 CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0)
18869 CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0)
18870 // Legacy builtins that take a vector in place of a vector predicate.
18871 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
18872 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
18873 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
18874 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
18875 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
18876 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
18877 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
18878 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
18879#include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
18880#undef CUSTOM_BUILTIN_MAPPING
18881 };
18882
18883 auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
18884 static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
18885 (void)SortOnce;
18886
18887 const Info *F = std::lower_bound(std::begin(Infos), std::end(Infos),
18888 Info{BuiltinID, 0, 0}, CmpInfo);
18889 if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
18890 return {Intrinsic::not_intrinsic, 0};
18891
18892 return {F->IntrinsicID, F->VecLen};
18893}
18894
18896 const CallExpr *E) {
18897 Intrinsic::ID ID;
18898 unsigned VecLen;
18899 std::tie(ID, VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID);
18900
18901 auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {
18902 // The base pointer is passed by address, so it needs to be loaded.
18904 Address BP = Address(Builder.CreateBitCast(
18906 llvm::Value *Base = Builder.CreateLoad(BP);
18907 // The treatment of both loads and stores is the same: the arguments for
18908 // the builtin are the same as the arguments for the intrinsic.
18909 // Load:
18910 // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
18911 // builtin(Base, Mod, Start) -> intr(Base, Mod, Start)
18912 // Store:
18913 // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
18914 // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start)
18916 for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i)
18917 Ops.push_back(EmitScalarExpr(E->getArg(i)));
18918
18919 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
18920 // The load intrinsics generate two results (Value, NewBase), stores
18921 // generate one (NewBase). The new base address needs to be stored.
18922 llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1)
18923 : Result;
18924 llvm::Value *LV = Builder.CreateBitCast(
18925 EmitScalarExpr(E->getArg(0)), NewBase->getType()->getPointerTo());
18927 llvm::Value *RetVal =
18928 Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
18929 if (IsLoad)
18930 RetVal = Builder.CreateExtractValue(Result, 0);
18931 return RetVal;
18932 };
18933
18934 // Handle the conversion of bit-reverse load intrinsics to bit code.
18935 // The intrinsic call after this function only reads from memory and the
18936 // write to memory is dealt by the store instruction.
18937 auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) {
18938 // The intrinsic generates one result, which is the new value for the base
18939 // pointer. It needs to be returned. The result of the load instruction is
18940 // passed to intrinsic by address, so the value needs to be stored.
18941 llvm::Value *BaseAddress =
18942 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
18943
18944 // Expressions like &(*pt++) will be incremented per evaluation.
18945 // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
18946 // per call.
18947 Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
18948 DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), Int8PtrTy),
18949 Int8Ty, DestAddr.getAlignment());
18950 llvm::Value *DestAddress = DestAddr.getPointer();
18951
18952 // Operands are Base, Dest, Modifier.
18953 // The intrinsic format in LLVM IR is defined as
18954 // { ValueType, i8* } (i8*, i32).
18955 llvm::Value *Result = Builder.CreateCall(
18956 CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))});
18957
18958 // The value needs to be stored as the variable is passed by reference.
18959 llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
18960
18961 // The store needs to be truncated to fit the destination type.
18962 // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
18963 // to be handled with stores of respective destination type.
18964 DestVal = Builder.CreateTrunc(DestVal, DestTy);
18965
18966 llvm::Value *DestForStore =
18967 Builder.CreateBitCast(DestAddress, DestVal->getType()->getPointerTo());
18968 Builder.CreateAlignedStore(DestVal, DestForStore, DestAddr.getAlignment());
18969 // The updated value of the base pointer is returned.
18970 return Builder.CreateExtractValue(Result, 1);
18971 };
18972
18973 auto V2Q = [this, VecLen] (llvm::Value *Vec) {
18974 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
18975 : Intrinsic::hexagon_V6_vandvrt;
18976 return Builder.CreateCall(CGM.getIntrinsic(ID),
18977 {Vec, Builder.getInt32(-1)});
18978 };
18979 auto Q2V = [this, VecLen] (llvm::Value *Pred) {
18980 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
18981 : Intrinsic::hexagon_V6_vandqrt;
18982 return Builder.CreateCall(CGM.getIntrinsic(ID),
18983 {Pred, Builder.getInt32(-1)});
18984 };
18985
18986 switch (BuiltinID) {
18987 // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
18988 // and the corresponding C/C++ builtins use loads/stores to update
18989 // the predicate.
18990 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
18991 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
18992 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
18993 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
18994 // Get the type from the 0-th argument.
18995 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
18996 Address PredAddr = Builder.CreateElementBitCast(
18997 EmitPointerWithAlignment(E->getArg(2)), VecType);
18998 llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));
18999 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
19000 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});
19001
19002 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
19003 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.getPointer(),
19004 PredAddr.getAlignment());
19005 return Builder.CreateExtractValue(Result, 0);
19006 }
19007
19008 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq:
19009 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq:
19010 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq:
19011 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq:
19012 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B:
19013 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B:
19014 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B:
19015 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: {
19017 const Expr *PredOp = E->getArg(0);
19018 // There will be an implicit cast to a boolean vector. Strip it.
19019 if (auto *Cast = dyn_cast<ImplicitCastExpr>(PredOp)) {
19020 if (Cast->getCastKind() == CK_BitCast)
19021 PredOp = Cast->getSubExpr();
19022 Ops.push_back(V2Q(EmitScalarExpr(PredOp)));
19023 }
19024 for (int i = 1, e = E->getNumArgs(); i != e; ++i)
19025 Ops.push_back(EmitScalarExpr(E->getArg(i)));
19026 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
19027 }
19028
19029 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
19030 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
19031 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
19032 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
19033 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
19034 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
19035 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
19036 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
19037 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
19038 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
19039 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
19040 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
19041 return MakeCircOp(ID, /*IsLoad=*/true);
19042 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
19043 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
19044 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
19045 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
19046 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
19047 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
19048 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
19049 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
19050 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
19051 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
19052 return MakeCircOp(ID, /*IsLoad=*/false);
19053 case Hexagon::BI__builtin_brev_ldub:
19054 return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
19055 case Hexagon::BI__builtin_brev_ldb:
19056 return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
19057 case Hexagon::BI__builtin_brev_lduh:
19058 return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
19059 case Hexagon::BI__builtin_brev_ldh:
19060 return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
19061 case Hexagon::BI__builtin_brev_ldw:
19062 return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
19063 case Hexagon::BI__builtin_brev_ldd:
19064 return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
19065 } // switch
19066
19067 return nullptr;
19068}
19069
19071 const CallExpr *E,
19072 ReturnValueSlot ReturnValue) {
19074 llvm::Type *ResultType = ConvertType(E->getType());
19075
19076 // Find out if any arguments are required to be integer constant expressions.
19077 unsigned ICEArguments = 0;
19079 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
19081 // Vector intrinsics don't have a type string.
19082 assert(BuiltinID >= clang::RISCV::FirstRVVBuiltin &&
19083 BuiltinID <= clang::RISCV::LastRVVBuiltin);
19084 ICEArguments = 0;
19085 if (BuiltinID == RISCVVector::BI__builtin_rvv_vget_v ||
19086 BuiltinID == RISCVVector::BI__builtin_rvv_vset_v)
19087 ICEArguments = 1 << 1;
19088 } else {
19089 assert(Error == ASTContext::GE_None && "Unexpected error");
19090 }
19091
19092 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
19093 // If this is a normal argument, just emit it as a scalar.
19094 if ((ICEArguments & (1 << i)) == 0) {
19095 Ops.push_back(EmitScalarExpr(E->getArg(i)));
19096 continue;
19097 }
19098
19099 // If this is required to be a constant, constant fold it so that we know
19100 // that the generated intrinsic gets a ConstantInt.
19101 Ops.push_back(llvm::ConstantInt::get(
19103 }
19104
19105 Intrinsic::ID ID = Intrinsic::not_intrinsic;
19106 unsigned NF = 1;
19107 constexpr unsigned TAIL_UNDISTURBED = 0;
19108
19109 // Required for overloaded intrinsics.
19111 switch (BuiltinID) {
19112 default: llvm_unreachable("unexpected builtin ID");
19113 case RISCV::BI__builtin_riscv_orc_b_32:
19114 case RISCV::BI__builtin_riscv_orc_b_64:
19115 case RISCV::BI__builtin_riscv_clz_32:
19116 case RISCV::BI__builtin_riscv_clz_64:
19117 case RISCV::BI__builtin_riscv_ctz_32:
19118 case RISCV::BI__builtin_riscv_ctz_64:
19119 case RISCV::BI__builtin_riscv_clmul:
19120 case RISCV::BI__builtin_riscv_clmulh:
19121 case RISCV::BI__builtin_riscv_clmulr:
19122 case RISCV::BI__builtin_riscv_bcompress_32:
19123 case RISCV::BI__builtin_riscv_bcompress_64:
19124 case RISCV::BI__builtin_riscv_bdecompress_32:
19125 case RISCV::BI__builtin_riscv_bdecompress_64:
19126 case RISCV::BI__builtin_riscv_bfp_32:
19127 case RISCV::BI__builtin_riscv_bfp_64:
19128 case RISCV::BI__builtin_riscv_grev_32:
19129 case RISCV::BI__builtin_riscv_grev_64:
19130 case RISCV::BI__builtin_riscv_gorc_32:
19131 case RISCV::BI__builtin_riscv_gorc_64:
19132 case RISCV::BI__builtin_riscv_shfl_32:
19133 case RISCV::BI__builtin_riscv_shfl_64:
19134 case RISCV::BI__builtin_riscv_unshfl_32:
19135 case RISCV::BI__builtin_riscv_unshfl_64:
19136 case RISCV::BI__builtin_riscv_xperm4:
19137 case RISCV::BI__builtin_riscv_xperm8:
19138 case RISCV::BI__builtin_riscv_xperm_n:
19139 case RISCV::BI__builtin_riscv_xperm_b:
19140 case RISCV::BI__builtin_riscv_xperm_h:
19141 case RISCV::BI__builtin_riscv_xperm_w:
19142 case RISCV::BI__builtin_riscv_crc32_b:
19143 case RISCV::BI__builtin_riscv_crc32_h:
19144 case RISCV::BI__builtin_riscv_crc32_w:
19145 case RISCV::BI__builtin_riscv_crc32_d:
19146 case RISCV::BI__builtin_riscv_crc32c_b:
19147 case RISCV::BI__builtin_riscv_crc32c_h:
19148 case RISCV::BI__builtin_riscv_crc32c_w:
19149 case RISCV::BI__builtin_riscv_crc32c_d:
19150 case RISCV::BI__builtin_riscv_fsl_32:
19151 case RISCV::BI__builtin_riscv_fsr_32:
19152 case RISCV::BI__builtin_riscv_fsl_64:
19153 case RISCV::BI__builtin_riscv_fsr_64:
19154 case RISCV::BI__builtin_riscv_brev8:
19155 case RISCV::BI__builtin_riscv_zip_32:
19156 case RISCV::BI__builtin_riscv_unzip_32: {
19157 switch (BuiltinID) {
19158 default: llvm_unreachable("unexpected builtin ID");
19159 // Zbb
19160 case RISCV::BI__builtin_riscv_orc_b_32:
19161 case RISCV::BI__builtin_riscv_orc_b_64:
19162 ID = Intrinsic::riscv_orc_b;
19163 break;
19164 case RISCV::BI__builtin_riscv_clz_32:
19165 case RISCV::BI__builtin_riscv_clz_64: {
19166 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
19167 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
19168 }
19169 case RISCV::BI__builtin_riscv_ctz_32:
19170 case RISCV::BI__builtin_riscv_ctz_64: {
19171 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
19172 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
19173 }
19174
19175 // Zbc
19176 case RISCV::BI__builtin_riscv_clmul:
19177 ID = Intrinsic::riscv_clmul;
19178 break;
19179 case RISCV::BI__builtin_riscv_clmulh:
19180 ID = Intrinsic::riscv_clmulh;
19181 break;
19182 case RISCV::BI__builtin_riscv_clmulr:
19183 ID = Intrinsic::riscv_clmulr;
19184 break;
19185
19186 // Zbe
19187 case RISCV::BI__builtin_riscv_bcompress_32:
19188 case RISCV::BI__builtin_riscv_bcompress_64:
19189 ID = Intrinsic::riscv_bcompress;
19190 break;
19191 case RISCV::BI__builtin_riscv_bdecompress_32:
19192 case RISCV::BI__builtin_riscv_bdecompress_64:
19193 ID = Intrinsic::riscv_bdecompress;
19194 break;
19195
19196 // Zbf
19197 case RISCV::BI__builtin_riscv_bfp_32:
19198 case RISCV::BI__builtin_riscv_bfp_64:
19199 ID = Intrinsic::riscv_bfp;
19200 break;
19201
19202 // Zbp
19203 case RISCV::BI__builtin_riscv_grev_32:
19204 case RISCV::BI__builtin_riscv_grev_64:
19205 ID = Intrinsic::riscv_grev;
19206 break;
19207 case RISCV::BI__builtin_riscv_gorc_32:
19208 case RISCV::BI__builtin_riscv_gorc_64:
19209 ID = Intrinsic::riscv_gorc;
19210 break;
19211 case RISCV::BI__builtin_riscv_shfl_32:
19212 case RISCV::BI__builtin_riscv_shfl_64:
19213 ID = Intrinsic::riscv_shfl;
19214 break;
19215 case RISCV::BI__builtin_riscv_unshfl_32:
19216 case RISCV::BI__builtin_riscv_unshfl_64:
19217 ID = Intrinsic::riscv_unshfl;
19218 break;
19219 case RISCV::BI__builtin_riscv_xperm_n:
19220 ID = Intrinsic::riscv_xperm_n;
19221 break;
19222 case RISCV::BI__builtin_riscv_xperm_b:
19223 ID = Intrinsic::riscv_xperm_b;
19224 break;
19225 case RISCV::BI__builtin_riscv_xperm_h:
19226 ID = Intrinsic::riscv_xperm_h;
19227 break;
19228 case RISCV::BI__builtin_riscv_xperm_w:
19229 ID = Intrinsic::riscv_xperm_w;
19230 break;
19231
19232 // Zbr
19233 case RISCV::BI__builtin_riscv_crc32_b:
19234 ID = Intrinsic::riscv_crc32_b;
19235 break;
19236 case RISCV::BI__builtin_riscv_crc32_h:
19237 ID = Intrinsic::riscv_crc32_h;
19238 break;
19239 case RISCV::BI__builtin_riscv_crc32_w:
19240 ID = Intrinsic::riscv_crc32_w;
19241 break;
19242 case RISCV::BI__builtin_riscv_crc32_d:
19243 ID = Intrinsic::riscv_crc32_d;
19244 break;
19245 case RISCV::BI__builtin_riscv_crc32c_b:
19246 ID = Intrinsic::riscv_crc32c_b;
19247 break;
19248 case RISCV::BI__builtin_riscv_crc32c_h:
19249 ID = Intrinsic::riscv_crc32c_h;
19250 break;
19251 case RISCV::BI__builtin_riscv_crc32c_w:
19252 ID = Intrinsic::riscv_crc32c_w;
19253 break;
19254 case RISCV::BI__builtin_riscv_crc32c_d:
19255 ID = Intrinsic::riscv_crc32c_d;
19256 break;
19257
19258 // Zbt
19259 case RISCV::BI__builtin_riscv_fsl_32:
19260 case RISCV::BI__builtin_riscv_fsl_64:
19261 ID = Intrinsic::riscv_fsl;
19262 break;
19263 case RISCV::BI__builtin_riscv_fsr_32:
19264 case RISCV::BI__builtin_riscv_fsr_64:
19265 ID = Intrinsic::riscv_fsr;
19266 break;
19267
19268 // Zbkx
19269 case RISCV::BI__builtin_riscv_xperm8:
19270 ID = Intrinsic::riscv_xperm8;
19271 break;
19272 case RISCV::BI__builtin_riscv_xperm4:
19273 ID = Intrinsic::riscv_xperm4;
19274 break;
19275
19276 // Zbkb
19277 case RISCV::BI__builtin_riscv_brev8:
19278 ID = Intrinsic::riscv_brev8;
19279 break;
19280 case RISCV::BI__builtin_riscv_zip_32:
19281 ID = Intrinsic::riscv_zip;
19282 break;
19283 case RISCV::BI__builtin_riscv_unzip_32:
19284 ID = Intrinsic::riscv_unzip;
19285 break;
19286 }
19287
19288 IntrinsicTypes = {ResultType};
19289 break;
19290 }
19291
19292 // Zk builtins
19293
19294 // Zknd
19295 case RISCV::BI__builtin_riscv_aes32dsi_32:
19296 ID = Intrinsic::riscv_aes32dsi;
19297 break;
19298 case RISCV::BI__builtin_riscv_aes32dsmi_32:
19299 ID = Intrinsic::riscv_aes32dsmi;
19300 break;
19301 case RISCV::BI__builtin_riscv_aes64ds_64:
19302 ID = Intrinsic::riscv_aes64ds;
19303 break;
19304 case RISCV::BI__builtin_riscv_aes64dsm_64:
19305 ID = Intrinsic::riscv_aes64dsm;
19306 break;
19307 case RISCV::BI__builtin_riscv_aes64im_64:
19308 ID = Intrinsic::riscv_aes64im;
19309 break;
19310
19311 // Zkne
19312 case RISCV::BI__builtin_riscv_aes32esi_32:
19313 ID = Intrinsic::riscv_aes32esi;
19314 break;
19315 case RISCV::BI__builtin_riscv_aes32esmi_32:
19316 ID = Intrinsic::riscv_aes32esmi;
19317 break;
19318 case RISCV::BI__builtin_riscv_aes64es_64:
19319 ID = Intrinsic::riscv_aes64es;
19320 break;
19321 case RISCV::BI__builtin_riscv_aes64esm_64:
19322 ID = Intrinsic::riscv_aes64esm;
19323 break;
19324
19325 // Zknd & Zkne
19326 case RISCV::BI__builtin_riscv_aes64ks1i_64:
19327 ID = Intrinsic::riscv_aes64ks1i;
19328 break;
19329 case RISCV::BI__builtin_riscv_aes64ks2_64:
19330 ID = Intrinsic::riscv_aes64ks2;
19331 break;
19332
19333 // Zknh
19334 case RISCV::BI__builtin_riscv_sha256sig0:
19335 ID = Intrinsic::riscv_sha256sig0;
19336 IntrinsicTypes = {ResultType};
19337 break;
19338 case RISCV::BI__builtin_riscv_sha256sig1:
19339 ID = Intrinsic::riscv_sha256sig1;
19340 IntrinsicTypes = {ResultType};
19341 break;
19342 case RISCV::BI__builtin_riscv_sha256sum0:
19343 ID = Intrinsic::riscv_sha256sum0;
19344 IntrinsicTypes = {ResultType};
19345 break;
19346 case RISCV::BI__builtin_riscv_sha256sum1:
19347 ID = Intrinsic::riscv_sha256sum1;
19348 IntrinsicTypes = {ResultType};
19349 break;
19350 case RISCV::BI__builtin_riscv_sha512sig0_64:
19351 ID = Intrinsic::riscv_sha512sig0;
19352 break;
19353 case RISCV::BI__builtin_riscv_sha512sig0h_32:
19354 ID = Intrinsic::riscv_sha512sig0h;
19355 break;
19356 case RISCV::BI__builtin_riscv_sha512sig0l_32:
19357 ID = Intrinsic::riscv_sha512sig0l;
19358 break;
19359 case RISCV::BI__builtin_riscv_sha512sig1_64:
19360 ID = Intrinsic::riscv_sha512sig1;
19361 break;
19362 case RISCV::BI__builtin_riscv_sha512sig1h_32:
19363 ID = Intrinsic::riscv_sha512sig1h;
19364 break;
19365 case RISCV::BI__builtin_riscv_sha512sig1l_32:
19366 ID = Intrinsic::riscv_sha512sig1l;
19367 break;
19368 case RISCV::BI__builtin_riscv_sha512sum0_64:
19369 ID = Intrinsic::riscv_sha512sum0;
19370 break;
19371 case RISCV::BI__builtin_riscv_sha512sum0r_32:
19372 ID = Intrinsic::riscv_sha512sum0r;
19373 break;
19374 case RISCV::BI__builtin_riscv_sha512sum1_64:
19375 ID = Intrinsic::riscv_sha512sum1;
19376 break;
19377 case RISCV::BI__builtin_riscv_sha512sum1r_32:
19378 ID = Intrinsic::riscv_sha512sum1r;
19379 break;
19380
19381 // Zksed
19382 case RISCV::BI__builtin_riscv_sm4ks:
19383 ID = Intrinsic::riscv_sm4ks;
19384 IntrinsicTypes = {ResultType};
19385 break;
19386 case RISCV::BI__builtin_riscv_sm4ed:
19387 ID = Intrinsic::riscv_sm4ed;
19388 IntrinsicTypes = {ResultType};
19389 break;
19390
19391 // Zksh
19392 case RISCV::BI__builtin_riscv_sm3p0:
19393 ID = Intrinsic::riscv_sm3p0;
19394 IntrinsicTypes = {ResultType};
19395 break;
19396 case RISCV::BI__builtin_riscv_sm3p1:
19397 ID = Intrinsic::riscv_sm3p1;
19398 IntrinsicTypes = {ResultType};
19399 break;
19400
19401 // Vector builtins are handled from here.
19402#include "clang/Basic/riscv_vector_builtin_cg.inc"
19403 }
19404
19405 assert(ID != Intrinsic::not_intrinsic);
19406
19407 llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
19408 return Builder.CreateCall(F, Ops, "");
19409}
Defines the clang::ASTContext interface.
#define V(N, I)
DynTypedNode Node
StringRef P
static SVal getValue(SVal val, SValBuilder &svalBuilder)
#define X86_CPU_SUBTYPE(ENUM, STR)
#define X86_VENDOR(ENUM, STRING)
#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS)
#define X86_CPU_TYPE(ENUM, STR)
auto * N
static const Builtin::Info BuiltinInfo[]
Definition Builtins.cpp:21
llvm::Error Error
llvm::Expected< T > Expected
static void Accumulate(SMap &SM, CFGBlock *B)
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
static Value * MakeBinaryAtomicValue(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Utility to insert an atomic instruction based on Intrinsic::ID and the expression node.
static char bitActionToX86BTCode(BitTest::ActionKind A)
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
static Value * EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering)
#define INTRINSIC_X86_XSAVE_ID(NAME)
static CanQualType getOSLogArgType(ASTContext &C, int Size)
Get the argument type for arguments to os_log_helper.
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
static bool areBOSTypesCompatible(int From, int To)
Checks if using the result of __builtin_object_size(p, From) in place of __builtin_object_size(p,...
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
#define MMA_VARIANTS_B1_AND(geom, type)
static bool AArch64SISDIntrinsicsProvenSorted
static Value * EmitX86CompressExpand(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsCompress)
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
static bool TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty, llvm::SmallPtrSetImpl< const Decl * > &Seen)
static Value * EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
static std::pair< Intrinsic::ID, unsigned > getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID)
static Value * emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, int low, int high)
@ UnsignedAlts
@ Vectorize1ArgType
@ FpCmpzModifiers
@ Use64BitVectors
@ VectorizeArgTypes
@ VectorRetGetArgs01
@ InventFloatType
@ AddRetType
@ Add2ArgTypes
@ VectorizeRetType
@ VectorRet
@ Add1ArgType
@ Use128BitVectors
#define MMA_INTR(geom_op_type, layout)
static Value * EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, bool Signed, ArrayRef< Value * > Ops)
static Value * emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
static bool AArch64SVEIntrinsicsProvenSorted
static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, const CallExpr *E)
MSVC handles setjmp a bit differently on different platforms.
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
static Value * EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E, MutableArrayRef< Value * > Ops, Value *Upper, bool ZeroMask=false, unsigned PTIdx=0, bool NegAcc=false)
#define MUTATE_LDBL(func)
static Value * EmitX86ExpandLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static Value * emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
static Value * EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty)
Determine if the specified type requires laundering by checking if it is a dynamic class type or cont...
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static Value * EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E)
static struct WidthAndSignedness EncompassingIntegerType(ArrayRef< struct WidthAndSignedness > Types)
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
#define MMA_VARIANTS(geom, type)
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
static Value * EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
constexpr unsigned SVEBitsPerBlock
#define NEONMAP0(NameBase)
static Value * EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static Value * emitBinaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
static Value * emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, Instruction::BinaryOps Op, bool Invert=false)
Utility to insert an atomic instruction based Intrinsic::ID and the expression node,...
static Value * EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, ArrayRef< Value * > Ops)
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
static Optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
#define MMA_SATF_VARIANTS(geom, type)
static int64_t clamp(int64_t Value, int64_t Low, int64_t High)
Definition CGBuiltin.cpp:64
static llvm::Value * EmitBitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Emit a _bittest* intrinsic.
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
static Value * EmitSignBit(CodeGenFunction &CGF, Value *V)
Emit the computation of the sign bit for a floating point value.
static Value * EmitFAbs(CodeGenFunction &CGF, Value *V)
EmitFAbs - Emit a call to @llvm.fabs().
#define CUSTOM_BUILTIN_MAPPING(x, s)
static Value * EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF, ArrayRef< Value * > Ops, llvm::Type *DstTy)
static bool isSpecialUnsignedMultiplySignedResult(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
static llvm::Value * getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType)
static llvm::Value * emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
static Optional< CodeGenFunction::MSVCIntrin > translateX86ToMsvcIntrin(unsigned BuiltinID)
static Value * EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, llvm::Type *DstTy)
static Value * emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
static WidthAndSignedness getIntegerWidthAndSignedness(const clang::ASTContext &context, const clang::QualType Type)
static Value * EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, Value *Amt, bool IsRight)
static RValue EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Emit a checked mixed-sign multiply.
static llvm::ScalableVectorType * getSVEVectorForElementType(llvm::Type *EltTy)
static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID)
#define INTRINSIC_WITH_CC(NAME)
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasLegalHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
static RValue EmitBinaryAtomic(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E)
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
static Value * EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, ArrayRef< Value * > Ops, bool InvertLHS=false)
static Value * EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::Type *ResultType)
static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, Align AlignmentInBytes)
Definition CGBuiltin.cpp:68
static Value * EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
SpecialRegisterAccessKind
@ VolatileRead
@ NormalRead
@ Write
static Value * EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering=AtomicOrdering::SequentiallyConsistent)
This function should be invoked to emit atomic cmpxchg for Microsoft's _InterlockedCompareExchange* i...
static Value * EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, unsigned BuiltinID, bool IsAddSub)
static Value * getMaskVecValue(CodeGenFunction &CGF, Value *Mask, unsigned NumElts)
static bool isSpecialMixedSignMultiply(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Determine if a binop is a checked mixed-sign multiply we can specialize.
static Value * MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool)
Utility to insert an atomic cmpxchg instruction.
static Optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
static Value * EmitToInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::IntegerType *IntType)
Emit the conversions required to turn the given value into an integer of the given size.
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
static Value * EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, unsigned NumElts, Value *MaskIn)
static Value * EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static bool NEONSIMDIntrinsicsProvenSorted
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
static Value * EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E)
static llvm::Value * EmitOverflowIntrinsic(CodeGenFunction &CGF, const llvm::Intrinsic::ID IntrinsicID, llvm::Value *X, llvm::Value *Y, llvm::Value *&Carry)
Emit a call to llvm.
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
static Value * emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
static Value * emitTernaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
#define MMA_LDST(n, geom_op_type)
static Value * EmitX86vpcom(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsSigned)
static Value * EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In)
static Value * EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E)
static Value * EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, unsigned IntrinsicID, const CallExpr *E)
Handle a SystemZ function in which the final argument is a pointer to an int that receives the post-i...
static llvm::Value * EmitX86BitTestIntrinsic(CodeGenFunction &CGF, BitTest BT, const CallExpr *E, Value *BitBase, Value *BitPos)
static RValue EmitCheckedUnsignedMultiplySignedResult(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
static Value * emitUnaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, llvm::StringRef Name="")
static Value * EmitX86CvtBF16ToFloatExpr(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops)
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue)
static Value * EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E)
static Value * EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, ArrayRef< Value * > Ops)
static llvm::AtomicOrdering getBitTestAtomicOrdering(BitTest::InterlockingKind I)
#define MMA_VARIANTS_B1_XOR(geom, type)
#define MMA_VARIANTS_I4(geom, type)
static Value * EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, bool IsSigned)
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
static Value * EmitX86ScalarSelect(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
static Value * EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
static bool AArch64SIMDIntrinsicsProvenSorted
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
CodeGenFunction::ComplexPairTy ComplexPairTy
static CompilationDatabasePluginRegistry::Add< FixedCompilationDatabasePlugin > X("fixed-compilation-database", "Reads plain-text flags file")
TokenType getType() const
Returns the token's type, e.g.
FormatToken * Next
The next token in the unwrapped line.
unsigned Offset
Definition Format.cpp:2579
#define ALIAS(NAME, TOK, FLAGS)
static bool Ret(InterpState &S, CodePtr &PC, APValue &Result)
Definition Interp.cpp:34
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
Enumerates target-specific builtins in their own namespaces within namespace clang.
SourceLocation Begin
__DEVICE__ float modf(float __x, float *__iptr)
__DEVICE__ double nan(const char *)
__device__ __2f16 float bool s
APSInt & getInt()
Definition APValue.h:415
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:209
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
unsigned getIntWidth(QualType T) const
CanQualType VoidPtrTy
IdentifierTable & Idents
Definition ASTContext.h:659
Builtin::Context & BuiltinInfo
Definition ASTContext.h:661
QualType getBaseElementType(const ArrayType *VAT) const
Return the innermost element type of an array type.
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
TypeInfo getTypeInfo(const Type *T) const
Get the size and alignment of the specified complete type in bits.
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArrayType::ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
QualType getObjCIdType() const
Represents the Objective-CC id type.
bool hasSameUnqualifiedType(QualType T1, QualType T2) const
Determine whether the given types are equivalent after cvr-qualifiers have been removed.
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType VoidTy
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
const TargetInfo & getTargetInfo() const
Definition ASTContext.h:772
CharUnits toCharUnitsFromBits(int64_t BitSize) const
Convert a size in bits to a size in characters.
@ GE_None
No error.
@ GE_Missing_type
Missing a type.
unsigned getTargetAddressSpace(QualType T) const
bool isLibFunction(unsigned ID) const
Return true if this is a builtin for a libc/libm function, with a "__builtin_" prefix (e....
Definition Builtins.h:137
const char * getName(unsigned ID) const
Return the identifier name for the specified builtin, e.g.
Definition Builtins.h:89
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition Expr.h:2801
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition Expr.h:2992
FunctionDecl * getDirectCallee()
If the callee is a FunctionDecl, return it. Otherwise return null.
Definition Expr.h:2971
Expr * getCallee()
Definition Expr.h:2951
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition Expr.h:2979
arg_range arguments()
Definition Expr.h:3040
QualType getCallReturnType(const ASTContext &Ctx) const
getCallReturnType - Get the return type of the call expr.
Definition Expr.cpp:1493
CharUnits - This is an opaque type for sizes expressed in character units.
Definition CharUnits.h:38
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition CharUnits.h:183
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition CharUnits.h:179
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
XRayInstrSet XRayInstrumentationBundle
Set of XRay instrumentation kinds to emit.
virtual bool allowBFloatArgsAndRet() const
Definition ABIInfo.h:59
An aligned address.
Definition Address.h:74
static Address invalid()
Definition Address.h:90
CharUnits getAlignment() const
Return the alignment of this pointer.
Definition Address.h:120
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:104
llvm::Value * getPointer() const
Definition Address.h:93
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:99
An aggregate value slot.
Definition CGValue.h:491
Address getAddress() const
Definition CGValue.h:627
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition CGBuilder.h:99
llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)
Definition CGBuilder.h:106
llvm::AtomicRMWInst * CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, llvm::Value *Ptr, llvm::Value *Val, llvm::AtomicOrdering Ordering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition CGBuilder.h:150
llvm::CallInst * CreateMemSet(Address Dest, llvm::Value *Value, llvm::Value *Size, bool IsVolatile=false)
Definition CGBuilder.h:341
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition CGBuilder.h:71
llvm::LoadInst * CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
Definition CGBuilder.h:89
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Definition CGBuilder.h:158
llvm::AtomicCmpXchgInst * CreateAtomicCmpXchg(llvm::Value *Ptr, llvm::Value *Cmp, llvm::Value *New, llvm::AtomicOrdering SuccessOrdering, llvm::AtomicOrdering FailureOrdering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition CGBuilder.h:138
Address CreateGEP(Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition CGBuilder.h:264
virtual std::string getDeviceSideName(const NamedDecl *ND)=0
Returns function or variable name on device side even if the current compilation is for host.
virtual llvm::GlobalVariable * getThrowInfo(QualType T)
Definition CGCXXABI.h:253
All available information about a concrete callee.
Definition CGCall.h:60
static CGCallee forDirect(llvm::Constant *functionPtr, const CGCalleeInfo &abstractInfo=CGCalleeInfo())
Definition CGCall.h:130
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
CGFunctionInfo - Class to encapsulate the information about a function definition.
virtual void EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF, Address DestPtr, Address SrcPtr, llvm::Value *Size)=0
EnqueuedBlockInfo emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E)
CallArgList - Type for representing both the value and type of arguments in a call.
Definition CGCall.h:259
void add(RValue rvalue, QualType type)
Definition CGCall.h:283
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
llvm::Value * EmitFromMemory(llvm::Value *Value, QualType Ty)
EmitFromMemory - Change a scalar value from its memory representation to its value representation.
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
llvm::Value * EmitLifetimeStart(llvm::TypeSize Size, llvm::Value *Addr)
std::pair< RValue, llvm::Value * > EmitAtomicCompareExchange(LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc, llvm::AtomicOrdering Success=llvm::AtomicOrdering::SequentiallyConsistent, llvm::AtomicOrdering Failure=llvm::AtomicOrdering::SequentiallyConsistent, bool IsWeak=false, AggValueSlot Slot=AggValueSlot::ignored())
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
llvm::CallInst * EmitTrapCall(llvm::Intrinsic::ID IntrID)
Emit a call to trap or debugtrap and attach function attribute "trap-func-name" if specified.
SanitizerSet SanOpts
Sanitizers enabled for this function.
RValue EmitBuiltinIsAligned(const CallExpr *E)
Emit IR for __builtin_is_aligned.
LValue EmitAggExprToLValue(const Expr *E)
EmitAggExprToLValue - Emit the computation of the specified expression of aggregate type into a tempo...
void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc, AbstractCallee AC, unsigned ParmNum)
Create a check for a function parameter that may potentially be declared as non-null.
llvm::Value * EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr)
void pushLifetimeExtendedDestroy(CleanupKind kind, Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
llvm::Value * EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind)
Emits an argument for a call to a builtin.
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
CleanupKind getARCCleanupKind()
Retrieves the default cleanup kind for an ARC cleanup.
llvm::Value * EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue emitBuiltinOSLogFormat(const CallExpr &E)
Emit IR for __builtin_os_log_format.
llvm::Value * EmitVAStartEnd(llvm::Value *ArgValue, bool IsStart)
Emits a call to an LLVM variable-argument intrinsic, either llvm.va_start or llvm....
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
llvm::Value * EmitSEHExceptionInfo()
RValue EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp)
Emit IR for __builtin_align_up/__builtin_align_down.
const LangOptions & getLangOpts() const
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
void EmitUnreachable(SourceLocation Loc)
Emit a reached-unreachable diagnostic if Loc is valid and runtime checking is enabled.
ComplexPairTy EmitComplexExpr(const Expr *E, bool IgnoreReal=false, bool IgnoreImag=false)
EmitComplexExpr - Emit the computation of the specified expression of complex type,...
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, llvm::CallBase **callOrInvoke, bool IsMustTail, SourceLocation Loc)
EmitCall - Generate a call of the given function, expecting the given result type,...
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Type * ConvertTypeForMem(QualType T)
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
bool AlwaysEmitXRayCustomEvents() const
AlwaysEmitXRayCustomEvents - Return true if we must unconditionally emit XRay custom event handling c...
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
@ Default
! No language constraints on evaluation order.
const TargetInfo & getTarget() const
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
llvm::Value * EmitSEHExceptionCode()
llvm::Value * EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
EmitTargetBuiltinExpr - Emit the given builtin call.
void pushCleanupAfterFullExpr(CleanupKind Kind, As... A)
Queue a cleanup to be pushed after finishing the current full-expression, potentially with an active ...
RValue EmitCoroutineIntrinsic(const CallExpr *E, unsigned int IID)
RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E)
Address EmitArrayToPointerDecay(const Expr *Array, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
void EmitCheck(ArrayRef< std::pair< llvm::Value *, SanitizerMask > > Checked, SanitizerHandler Check, ArrayRef< llvm::Constant * > StaticArgs, ArrayRef< llvm::Value * > DynamicArgs)
Create a basic block that will either trap or call a handler function in the UBSan runtime with the p...
RValue EmitBuiltinNewDeleteCall(const FunctionProtoType *Type, const CallExpr *TheCallExpr, bool IsDelete)
RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Fp, const llvm::CmpInst::Predicate Ip, const llvm::Twine &Name="")
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
RValue emitRotate(const CallExpr *E, bool IsRotateRight)
llvm::Constant * EmitCheckSourceLocation(SourceLocation Loc)
Emit a description of a source location in a format suitable for passing to a runtime sanitizer handl...
void ErrorUnsupported(const Stmt *S, const char *Type)
ErrorUnsupported - Print out an error that codegen doesn't support the specified stmt yet.
Address EmitVAListRef(const Expr *E)
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
void emitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty, SourceLocation Loc, SourceLocation AssumptionLoc, llvm::Value *Alignment, llvm::Value *OffsetValue=nullptr)
const TargetCodeGenInfo & getTargetHooks() const
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
bool ShouldXRayInstrumentFunction() const
ShouldXRayInstrument - Return true if the current function should be instrumented with XRay nop sleds...
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitToMemory(llvm::Value *Value, QualType Ty)
EmitToMemory - Change a scalar value from its value representation to its in-memory representation.
RValue EmitOpenMPDevicePrintfCallExpr(const CallExpr *E)
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::Value * EmitARCRetain(QualType type, llvm::Value *value)
bool AlwaysEmitXRayTypedEvents() const
AlwaysEmitXRayTypedEvents - Return true if clang must unconditionally emit XRay typed event handling ...
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Function * generateBuiltinOSLogHelperFunction(const analyze_os_log::OSLogBufferLayout &Layout, CharUnits BufferAlignment)
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
CGCallee EmitCallee(const Expr *E)
Address CreateMemTemp(QualType T, const Twine &Name="tmp", Address *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
void checkTargetFeatures(const CallExpr *E, const FunctionDecl *TargetDecl)
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
llvm::Value * EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertType(QualType T)
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
llvm::CallBase * EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, ArrayRef< llvm::Value * > args, const Twine &name="")
llvm::Value * EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T)
llvm::Value * EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitCastToVoidPtr(llvm::Value *value)
Emit a cast to void* in the appropriate address space.
void EmitARCIntrinsicUse(ArrayRef< llvm::Value * > values)
RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E)
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address EmitMSVAListRef(const Expr *E)
Emit a "reference" to a __builtin_ms_va_list; this is always the value of the expression,...
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
bool ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID)
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
llvm::Value * EmitSEHAbnormalTermination()
llvm::Value * EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E)
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", Address *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
RValue GetUndefRValue(QualType Ty)
GetUndefRValue - Get an appropriate 'undef' rvalue for the given type.
LValue EmitLValue(const Expr *E)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
llvm::LLVMContext & getLLVMContext()
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
llvm::Value * EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
llvm::Value * EmitAnnotationCall(llvm::Function *AnnotationFn, llvm::Value *AnnotatedVal, StringRef AnnotationStr, SourceLocation Location, const AnnotateAttr *Attr)
Emit an annotation call (intrinsic).
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
This class organizes the cross-function state that is used while generating LLVM code.
llvm::Module & getModule() const
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
llvm::Constant * getBuiltinLibFunction(const FunctionDecl *FD, unsigned BuiltinID)
Given a builtin id for a function like "__builtin_fabsf", return a Function* for "fabsf".
Definition CGBuiltin.cpp:93
void ErrorUnsupported(const Stmt *S, const char *Type)
Print out an error that codegen doesn't support the specified stmt yet.
CGCUDARuntime & getCUDARuntime()
Return a reference to the configured CUDA runtime.
CGOpenCLRuntime & getOpenCLRuntime()
Return a reference to the configured OpenCL runtime.
const TargetInfo & getTarget() const
const llvm::DataLayout & getDataLayout() const
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys=None)
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
llvm::Constant * GetFunctionStart(const ValueDecl *Decl)
const llvm::Triple & getTriple() const
void DecorateInstructionWithTBAA(llvm::Instruction *Inst, TBAAAccessInfo TBAAInfo)
DecorateInstructionWithTBAA - Decorate the instruction with a TBAA tag.
llvm::Constant * CreateRuntimeVariable(llvm::Type *Ty, StringRef Name)
Create a new runtime global variable with the specified type and name.
TBAAAccessInfo getTBAAAccessInfo(QualType AccessType)
getTBAAAccessInfo - Get TBAA information that describes an access to an object of the given type.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
CharUnits getNaturalPointeeTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
llvm::LLVMContext & getLLVMContext()
CGObjCRuntime & getObjCRuntime()
Return a reference to the configured Objective-C runtime.
void SetLLVMFunctionAttributes(GlobalDecl GD, const CGFunctionInfo &Info, llvm::Function *F, bool IsThunk)
Set the LLVM function attributes (sext, zext, etc).
void SetLLVMFunctionAttributesForDefinition(const Decl *D, llvm::Function *F)
Set the LLVM function attributes which only apply to a function definition.
ConstantAddress GetAddrOfConstantCString(const std::string &Str, const char *GlobalName=nullptr)
Returns a pointer to a character array containing the literal and a terminating '\0' character.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition CGCall.cpp:1613
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition CGCall.cpp:666
const CGFunctionInfo & arrangeBuiltinFunctionCall(QualType resultType, const CallArgList &args)
Definition CGCall.cpp:653
llvm::Constant * emitAbstract(const Expr *E, QualType T)
Emit the result of the given expression as an abstract constant, asserting that it succeeded.
Information for lazily generating a cleanup.
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition CGCall.h:353
llvm::Value * getBitFieldPointer() const
Definition CGValue.h:393
llvm::Value * getPointer(CodeGenFunction &CGF) const
Definition CGValue.h:337
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition CGValue.h:39
static RValue getIgnored()
Definition CGValue.h:84
static RValue get(llvm::Value *V)
Definition CGValue.h:89
static RValue getAggregate(Address addr, bool isVolatile=false)
Definition CGValue.h:110
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition CGValue.h:96
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition CGValue.h:61
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition CGCall.h:357
virtual llvm::Value * encodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert the address of an instruction into a return address ...
Definition TargetInfo.h:124
virtual llvm::Value * decodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert a return address as stored by the system into the ac...
Definition TargetInfo.h:114
const ABIInfo & getABIInfo() const
getABIInfo() - Returns ABI info helper for the target.
Definition TargetInfo.h:53
virtual int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const
Determines the DWARF register number for the stack pointer, for exception-handling purposes.
Definition TargetInfo.h:96
Complex values, per C99 6.2.5p11.
Definition Type.h:2659
Represents a concrete matrix type with constant number of rows and columns.
Definition Type.h:3527
T * getAttr() const
Definition DeclBase.h:545
FunctionDecl * getAsFunction() LLVM_READONLY
Returns the function itself, or the templated function if this is a function template.
Definition DeclBase.cpp:218
bool hasAttr() const
Definition DeclBase.h:549
This represents one expression.
Definition Expr.h:109
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:2958
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition Expr.cpp:2953
bool EvaluateAsFloat(llvm::APFloat &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsFloat - Return true if this is a constant which we can fold and convert to a floating point...
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition Expr.cpp:2949
bool isPRValue() const
Definition Expr.h:271
@ NPC_ValueDependentIsNotNull
Specifies that a value-dependent expression should be considered to never be a null pointer constant.
Definition Expr.h:790
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition Expr.h:437
bool EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsRValue - Return true if this is a constant which we can fold to an rvalue using any crazy t...
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition Expr.cpp:3421
Expr * IgnoreImpCasts() LLVM_READONLY
Skip past any implicit casts which might surround this expression until reaching a fixed point.
Definition Expr.cpp:2933
NullPointerConstantKind isNullPointerConstant(ASTContext &Ctx, NullPointerConstantValueDependence NPC) const
isNullPointerConstant - C99 6.3.2.3p3 - Test if this reduces down to a Null pointer constant.
Definition Expr.cpp:3779
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:247
Optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr, bool isEvaluated=true) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
QualType getType() const
Definition Expr.h:141
bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, unsigned Type) const
If the current Expr is a pointer, this will try to statically determine the number of bytes available...
const ValueDecl * getAsBuiltinConstantDeclRef(const ASTContext &Context) const
If this expression is an unambiguous reference to a single declaration, in the style of __builtin_fun...
Definition Expr.cpp:207
Represents a member of a struct/union/class.
Definition Decl.h:2873
Represents a function declaration or definition.
Definition Decl.h:1874
const ParmVarDecl * getParamDecl(unsigned i) const
Definition Decl.h:2537
Represents a prototype with parameter type info, e.g.
Definition Type.h:3962
GlobalDecl - represents a global declaration.
Definition GlobalDecl.h:56
const Decl * getDecl() const
Definition GlobalDecl.h:103
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
@ Other
Other implicit parameter.
Definition Decl.h:1640
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition Decl.cpp:4973
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:274
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PipeType - OpenCL20.
Definition Type.h:6407
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition Type.h:2712
QualType getPointeeType() const
Definition Type.h:2722
A (possibly-)qualified type.
Definition Type.h:731
bool isVolatileQualified() const
Determine whether this type is volatile-qualified.
Definition Type.h:6629
LangAS getAddressSpace() const
Return the address space of this type.
Definition Type.h:6680
QualType getCanonicalType() const
Definition Type.h:6598
The collection of all-type qualifiers we support.
Definition Type.h:147
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
MemEltType getMemEltType() const
bool isGatherLoad() const
bool isOverloadCvt() const
EltType getEltType() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isTupleSet() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isOverloadWhile() const
bool isAppendSVALL() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
unsigned getSplatOperand() const
bool isScatterStore() const
bool isReverseCompare() const
Scope - A scope is a transient data structure that is used while parsing the program.
Definition Scope.h:40
Encodes a location in the source.
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Stmt.cpp:336
Exposes information about the current target.
Definition TargetInfo.h:191
TargetOptions & getTargetOpts() const
Retrieve the target options.
Definition TargetInfo.h:266
virtual bool hasLegalHalfType() const
Determine whether _Float16 is supported on this target.
Definition TargetInfo.h:618
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
bool isLittleEndian() const
virtual const char * getClobbers() const =0
Returns a string of target-specific clobbers, in LLVM format.
unsigned getMaxOpenCLWorkGroupSize() const
Definition TargetInfo.h:777
bool isBigEndian() const
virtual bool checkArithmeticFenceSupported() const
Controls if __arithmetic_fence is supported in the targeted backend.
unsigned getSuitableAlign() const
Return the alignment that is the largest alignment ever used for any scalar/SIMD data type on the tar...
Definition TargetInfo.h:645
CodeObjectVersionKind CodeObjectVersion
Code object version for AMDGPU.
The base class of the type hierarchy.
Definition Type.h:1556
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition Type.cpp:1759
bool isBlockPointerType() const
Definition Type.h:6815
bool isVoidType() const
Definition Type.h:7096
bool isBooleanType() const
Definition Type.h:7212
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition Type.cpp:2018
bool isArrayType() const
Definition Type.h:6873
bool isPointerType() const
Definition Type.h:6807
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition Type.h:7128
const T * castAs() const
Member-template castAs<specific type>.
Definition Type.h:7369
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:625
bool isBitIntType() const
Definition Type.h:7031
bool isFloatingType() const
Definition Type.cpp:2121
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition Type.cpp:2068
const T * getAs() const
Member-template getAs<specific type>'.
Definition Type.h:7302
QualType getType() const
Definition Decl.h:685
Represents a GCC generic vector type.
Definition Type.h:3301
SmallVector< OSLogBufferItem, 4 > Items
Definition OSLog.h:113
unsigned char getNumArgsByte() const
Definition OSLog.h:148
unsigned char getSummaryByte() const
Definition OSLog.h:139
long int64_t
unsigned long uint64_t
Defines the clang::TargetInfo interface.
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
llvm::Constant * initializationPatternFor(CodeGenModule &, llvm::Type *)
TypeEvaluationKind
The kind of evaluation to perform on values of a particular type.
@ EHCleanup
Denotes a cleanup that should run when a scope is exited using exceptional control flow (a throw stat...
constexpr XRayInstrMask Typed
Definition XRayInstr.h:42
constexpr XRayInstrMask Custom
Definition XRayInstr.h:41
bool computeOSLogBufferLayout(clang::ASTContext &Ctx, const clang::CallExpr *E, OSLogBufferLayout &layout)
Definition OSLog.cpp:180
@ OS
Indicates that the tracking object is a descendant of a referenced-counted OSObject,...
const void * Store
Store - This opaque type encapsulates an immutable mapping from locations to values.
Definition StoreRef.h:27
bool Sub(InterpState &S, CodePtr OpPC)
Definition Interp.h:142
bool Dup(InterpState &S, CodePtr OpPC)
Definition Interp.h:280
bool Zero(InterpState &S, CodePtr OpPC)
Definition Interp.h:814
unsigned Trunc(InterpState &S, CodePtr OpPC, unsigned Bits, const T &V)
Definition Interp.h:848
bool Load(InterpState &S, CodePtr OpPC)
Definition Interp.h:618
bool Cast(InterpState &S, CodePtr OpPC)
Definition Interp.h:802
bool isa(CodeGen::Address addr)
Definition Address.h:177
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition Specifiers.h:139
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
Expr * Cond
};
@ C
Languages that the frontend can parse and compile.
@ Asm
Assembly: we accept this only so that we can preprocess it.
@ Result
The result type of a method or function.
U cast(CodeGen::Address addr)
Definition Address.h:174
YAML serialization mapping.
Definition Dominators.h:30
#define true
Definition stdbool.h:21
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:612
APValue Val
Val - This is the value the expression can be folded to.
Definition Expr.h:614
bool has(SanitizerMask K) const
Check if a certain (single) sanitizer is enabled.
Definition Sanitizers.h:155
bool has(XRayInstrMask K) const
Definition XRayInstr.h:48
#define sinh(__x)
Definition tgmath.h:373
#define asin(__x)
Definition tgmath.h:112
#define scalbln(__x, __y)
Definition tgmath.h:1182
#define sqrt(__x)
Definition tgmath.h:520
#define acos(__x)
Definition tgmath.h:83
#define fmin(__x, __y)
Definition tgmath.h:780
#define exp(__x)
Definition tgmath.h:431
#define ilogb(__x)
Definition tgmath.h:851
#define copysign(__x, __y)
Definition tgmath.h:618
#define erf(__x)
Definition tgmath.h:636
#define atanh(__x)
Definition tgmath.h:228
#define remquo(__x, __y, __z)
Definition tgmath.h:1111
#define nextafter(__x, __y)
Definition tgmath.h:1055
#define frexp(__x, __y)
Definition tgmath.h:816
#define asinh(__x)
Definition tgmath.h:199
#define erfc(__x)
Definition tgmath.h:653
#define atan2(__x, __y)
Definition tgmath.h:566
#define nexttoward(__x, __y)
Definition tgmath.h:1073
#define hypot(__x, __y)
Definition tgmath.h:833
#define exp2(__x)
Definition tgmath.h:670
#define sin(__x)
Definition tgmath.h:286
#define cbrt(__x)
Definition tgmath.h:584
#define log2(__x)
Definition tgmath.h:970
#define llround(__x)
Definition tgmath.h:919
#define cosh(__x)
Definition tgmath.h:344
#define trunc(__x)
Definition tgmath.h:1216
#define fmax(__x, __y)
Definition tgmath.h:762
#define ldexp(__x, __y)
Definition tgmath.h:868
#define acosh(__x)
Definition tgmath.h:170
#define tgamma(__x)
Definition tgmath.h:1199
#define scalbn(__x, __y)
Definition tgmath.h:1165
#define round(__x)
Definition tgmath.h:1148
#define fmod(__x, __y)
Definition tgmath.h:798
#define llrint(__x)
Definition tgmath.h:902
#define tan(__x)
Definition tgmath.h:315
#define cos(__x)
Definition tgmath.h:257
#define log10(__x)
Definition tgmath.h:936
#define fabs(__x)
Definition tgmath.h:549
#define pow(__x, __y)
Definition tgmath.h:490
#define log1p(__x)
Definition tgmath.h:953
#define rint(__x)
Definition tgmath.h:1131
#define expm1(__x)
Definition tgmath.h:687
#define remainder(__x, __y)
Definition tgmath.h:1090
#define fdim(__x, __y)
Definition tgmath.h:704
#define lgamma(__x)
Definition tgmath.h:885
#define tanh(__x)
Definition tgmath.h:402
#define lrint(__x)
Definition tgmath.h:1004
#define atan(__x)
Definition tgmath.h:141
#define floor(__x)
Definition tgmath.h:722
#define ceil(__x)
Definition tgmath.h:601
#define log(__x)
Definition tgmath.h:460
#define logb(__x)
Definition tgmath.h:987
#define nearbyint(__x)
Definition tgmath.h:1038
#define lround(__x)
Definition tgmath.h:1021
#define fma(__x, __y, __z)
Definition tgmath.h:742